vectordb
npx skills add https://github.com/lobbi-docs/claude --skill vectordb
Agent 安装分布
Skill 文档
Vector Database Skill
Provides comprehensive vector database capabilities for the Golden Armada AI Agent Fleet Platform.
When to Use This Skill
Activate this skill when working with:
- Semantic search implementation
- RAG (Retrieval Augmented Generation)
- Embedding storage and retrieval
- Similarity search
- Vector index management
Embedding Generation
“`python import openai from anthropic import Anthropic
OpenAI embeddings
def get_openai_embedding(text: str) -> list[float]: response = openai.embeddings.create( model=”text-embedding-3-small”, input=text ) return response.data[0].embedding
Batch embeddings
def get_batch_embeddings(texts: list[str]) -> list[list[float]]: response = openai.embeddings.create( model=”text-embedding-3-small”, input=texts ) return [item.embedding for item in response.data] “`
Pinecone
“`python from pinecone import Pinecone, ServerlessSpec
Initialize
pc = Pinecone(api_key=os.environ[“PINECONE_API_KEY”])
Create index
pc.create_index( name=”agents”, dimension=1536, metric=”cosine”, spec=ServerlessSpec( cloud=”aws”, region=”us-west-2″ ) )
Get index
index = pc.Index(“agents”)
Upsert vectors
index.upsert( vectors=[ { “id”: “agent-1”, “values”: embedding, “metadata”: { “name”: “Claude Agent”, “type”: “claude”, “description”: “General assistant” } } ], namespace=”production” )
Query
results = index.query( vector=query_embedding, top_k=10, include_metadata=True, namespace=”production”, filter={“type”: {“$eq”: “claude”}} )
for match in results.matches: print(f”{match.id}: {match.score} – {match.metadata}”)
Delete
index.delete(ids=[“agent-1″], namespace=”production”) “`
Chroma
“`python import chromadb from chromadb.config import Settings
Initialize
client = chromadb.PersistentClient(path=”./chroma_db”)
Create collection
collection = client.get_or_create_collection( name=”agents”, metadata={“hnsw:space”: “cosine”} )
Add documents
collection.add( ids=[“agent-1”, “agent-2”], embeddings=[embedding1, embedding2], documents=[“Document 1 text”, “Document 2 text”], metadatas=[ {“type”: “claude”, “version”: “3”}, {“type”: “gpt”, “version”: “4”} ] )
Query
results = collection.query( query_embeddings=[query_embedding], n_results=10, where={“type”: “claude”}, include=[“documents”, “metadatas”, “distances”] )
Update
collection.update( ids=[“agent-1”], metadatas=[{“type”: “claude”, “version”: “3.5”}] )
Delete
collection.delete(ids=[“agent-1”]) “`
pgvector (PostgreSQL)
“`sql — Enable extension CREATE EXTENSION vector;
— Create table CREATE TABLE documents ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), content TEXT NOT NULL, embedding vector(1536), metadata JSONB DEFAULT ‘{}’ );
— Create index (IVFFlat for larger datasets) CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
— Or HNSW for better recall CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
— Insert INSERT INTO documents (content, embedding, metadata) VALUES (‘Document content’, ‘[0.1, 0.2, …]’, ‘{“type”: “manual”}’);
— Similarity search SELECT id, content, 1 – (embedding <=> $1) AS similarity FROM documents ORDER BY embedding <=> $1 LIMIT 10;
— With filter SELECT id, content, 1 – (embedding <=> $1) AS similarity FROM documents WHERE metadata->>’type’ = ‘manual’ ORDER BY embedding <=> $1 LIMIT 10; “`
Python with pgvector
“`python from pgvector.sqlalchemy import Vector from sqlalchemy import Column, String, JSON from sqlalchemy.dialects.postgresql import UUID
class Document(Base): tablename = ‘documents’
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
content = Column(String, nullable=False)
embedding = Column(Vector(1536))
metadata = Column(JSON, default={})
Insert
doc = Document( content=”Document content”, embedding=embedding, metadata={“type”: “manual”} ) session.add(doc) session.commit()
Query
from sqlalchemy import select from pgvector.sqlalchemy import cosine_distance
results = session.execute( select(Document) .order_by(cosine_distance(Document.embedding, query_embedding)) .limit(10) ).scalars().all() “`
RAG Implementation
“`python class RAGService: def init(self, vector_store, llm_client): self.vector_store = vector_store self.llm = llm_client
async def query(self, question: str, top_k: int = 5) -> str:
# 1. Generate query embedding
query_embedding = await self.get_embedding(question)
# 2. Retrieve relevant documents
docs = await self.vector_store.search(
embedding=query_embedding,
top_k=top_k
)
# 3. Build context
context = "\n\n".join([
f"Document {i+1}:\n{doc.content}"
for i, doc in enumerate(docs)
])
# 4. Generate response
prompt = f"""Answer the question based on the following context.
Context: {context}
Question: {question}
Answer:”””
response = await self.llm.generate(prompt)
return response
async def add_document(self, content: str, metadata: dict = None):
# Chunk document
chunks = self.chunk_text(content)
# Generate embeddings
embeddings = await self.get_batch_embeddings(chunks)
# Store
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
await self.vector_store.upsert(
id=f"{metadata.get('doc_id', 'doc')}-{i}",
embedding=embedding,
content=chunk,
metadata=metadata
)
def chunk_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk)
start += chunk_size - overlap
return chunks
“`
Best Practices
- Choose appropriate index type (HNSW for recall, IVFFlat for scale)
- Chunk documents appropriately (typically 500-1000 tokens)
- Include overlap between chunks (10-20%)
- Store metadata for filtering
- Use namespaces/collections to organize data
- Monitor query latency and index performance
- Batch operations for bulk inserts