How to Choose a Vector Database
Vector DB selection affects the entire system architecture. You need to weigh existing infrastructure, scale requirements, accuracy needs, and operational costs.
| DB | Characteristics | Best For |
|---|---|---|
| pgvector | PostgreSQL integration | Existing PG infrastructure, small-medium scale |
| Qdrant | High performance, Rust | Large scale, high throughput |
| Weaviate | GraphQL, schema management | Structured data + vectors |
| Chroma | Embedded, dev-friendly | Prototypes, local dev |
The first criterion is integration ease with existing stack.
pgvector: Shortest Path to Integration
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE embeddings (
id BIGSERIAL PRIMARY KEY,
content TEXT NOT NULL,
metadata JSONB DEFAULT '{}',
embedding vector(1536),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- HNSW index (high accuracy, more memory)
CREATE INDEX ON embeddings USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- Similarity search
SELECT id, content, metadata,
1 - (embedding <=> $1::vector) AS similarity
FROM embeddings
ORDER BY embedding <=> $1::vector
LIMIT 10;
Python usage:
import asyncpg
from pgvector.asyncpg import register_vector
class PgVectorStore:
def __init__(self, conn: asyncpg.Connection):
self.conn = conn
async def search(self, query_embedding: list[float], limit: int = 5) -> list[dict]:
rows = await self.conn.fetch("""
SELECT id, content, metadata,
1 - (embedding <=> $1::vector) AS similarity
FROM embeddings
ORDER BY embedding <=> $1::vector
LIMIT $2
""", query_embedding, limit)
return [dict(row) for row in rows]
Qdrant: Large-Scale High-Speed Vector Search
from qdrant_client import AsyncQdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
import uuid
client = AsyncQdrantClient(url="http://localhost:6333")
async def upsert_documents(collection_name: str, documents: list[dict], embeddings: list[list[float]]):
points = [
PointStruct(
id=str(uuid.uuid4()),
vector=embedding,
payload={"content": doc["content"], "tags": doc.get("tags", [])},
)
for doc, embedding in zip(documents, embeddings)
]
await client.upsert(collection_name=collection_name, points=points)
async def filtered_search(collection_name: str, query_embedding: list[float], tag_filter: str | None = None, limit: int = 5):
query_filter = None
if tag_filter:
query_filter = Filter(must=[FieldCondition(key="tags", match=MatchValue(value=tag_filter))])
results = await client.search(
collection_name=collection_name,
query_vector=query_embedding,
query_filter=query_filter,
limit=limit,
with_payload=True,
)
return [{"id": r.id, "score": r.score, **r.payload} for r in results]
Abstraction Layer for Multiple Vector DBs
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass
class SearchResult:
id: str
content: str
score: float
metadata: dict
class VectorStore(ABC):
@abstractmethod
async def insert(self, content: str, embedding: list[float], metadata: dict) -> str: ...
@abstractmethod
async def search(self, query_embedding: list[float], limit: int = 5, filters: dict | None = None) -> list[SearchResult]: ...
@abstractmethod
async def delete(self, doc_id: str) -> bool: ...
def create_vector_store(backend: str, **kwargs) -> VectorStore:
if backend == "pgvector":
return PgVectorStoreImpl(**kwargs)
elif backend == "qdrant":
return QdrantStoreImpl(**kwargs)
raise ValueError(f"Unknown backend: {backend}")
Index Strategy and Performance Tuning
-- pgvector HNSW tuning
SET hnsw.ef_search = 100; -- Default 40, higher = better accuracy, slower speed
SET max_parallel_workers_per_gather = 4;
SET maintenance_work_mem = '2GB'; -- For large index builds
async def bulk_insert_optimized(conn, records: list[tuple], batch_size: int = 1000) -> int:
total = 0
for i in range(0, len(records), batch_size):
batch = records[i:i + batch_size]
# copy_records_to_table is 10x faster than INSERT
await conn.copy_records_to_table(
"embeddings",
records=[(c, e, m) for c, e, m in batch],
columns=["content", "embedding", "metadata"],
)
total += len(batch)
return total
There's no single "right" vector DB — start simply with pgvector, then migrate to Qdrant once scale requirements become clear.
This article is from the Claude Code Complete Guide (7 chapters) on note.com.
myouga (@myougatheaxo) - VTuber axolotl. Sharing practical AI development tips.
Top comments (0)