A RAG (Retrieval-Augmented Generation) chatbot answers questions based on your own documents — not just its training data. This guide builds one from scratch using Python, ChromaDB, and Claude.
Originally published at kalyna.pro
What Is RAG?
RAG combines two things:
- Retrieval: search your documents for relevant chunks
- Generation: use an LLM to write an answer based on those chunks
Without RAG, Claude can only answer questions based on its training data. With RAG, you inject relevant context directly into the prompt.
Architecture
- Indexing: load docs → split into chunks → embed → store in vector DB
- Querying: embed question → find similar chunks → send to Claude → return answer
Step 1: Install Dependencies
pip install anthropic chromadb sentence-transformers pypdf2
Step 2: Load and Chunk Documents
import PyPDF2
from pathlib import Path
def load_pdf(path: str) -> str:
with open(path, "rb") as f:
reader = PyPDF2.PdfReader(f)
return "\n".join(page.extract_text() for page in reader.pages)
def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
words = text.split()
chunks = []
i = 0
while i < len(words):
chunks.append(" ".join(words[i : i + chunk_size]))
i += chunk_size - overlap
return chunks
Step 3: Build the Vector Index
import chromadb
from chromadb.utils import embedding_functions
embed_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)
client = chromadb.PersistentClient(path="./chroma_db")
def build_index(documents: list[dict], collection_name: str = "docs"):
collection = client.get_or_create_collection(
name=collection_name,
embedding_function=embed_fn,
)
texts, ids, metadatas = [], [], []
for i, doc in enumerate(documents):
for j, chunk in enumerate(chunk_text(doc["text"])):
texts.append(chunk)
ids.append(f"{doc['source']}_chunk_{j}")
metadatas.append({"source": doc["source"]})
collection.add(documents=texts, ids=ids, metadatas=metadatas)
return collection
Step 4: Query + Generate Answer
def retrieve(question: str, collection, n_results: int = 5) -> list[str]:
return collection.query(query_texts=[question], n_results=n_results)["documents"][0]
def generate_answer(question: str, chunks: list[str]) -> str:
context = "\n\n---\n\n".join(chunks)
response = claude.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{
"role": "user",
"content": f"Answer using ONLY the context below.\n\nContext:\n{context}\n\nQuestion: {question}"
}],
)
return response.content[0].text
Step 5: Full Chatbot Script
import anthropic
import chromadb
from chromadb.utils import embedding_functions
import PyPDF2
from pathlib import Path
embed_fn = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
chroma = chromadb.PersistentClient(path="./chroma_db")
claude = anthropic.Anthropic()
def index_files(file_paths: list[str]) -> None:
collection = chroma.get_or_create_collection("docs", embedding_function=embed_fn)
texts, ids, metas = [], [], []
for path in file_paths:
p = Path(path)
if p.suffix == ".pdf":
with open(p, "rb") as f:
text = "\n".join(page.extract_text() for page in PyPDF2.PdfReader(f).pages)
else:
text = p.read_text()
for j, chunk in enumerate(chunk_text(text)):
texts.append(chunk); ids.append(f"{p.name}_{j}"); metas.append({"source": path})
collection.add(documents=texts, ids=ids, metadatas=metas)
print(f"Indexed {len(texts)} chunks")
def ask(question: str) -> str:
collection = chroma.get_collection("docs", embedding_function=embed_fn)
chunks = collection.query(query_texts=[question], n_results=5)["documents"][0]
context = "\n\n---\n\n".join(chunks)
response = claude.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}],
)
return response.content[0].text
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
index_files(sys.argv[1:])
else:
print("RAG Chatbot ready. Type 'quit' to exit.")
while True:
q = input("You: ").strip()
if q.lower() in ("quit", "exit"): break
print("Bot:", ask(q), "\n")
Usage
# Index documents
python chatbot.py company_handbook.pdf product_docs.txt
# Start chatting
python chatbot.py
Performance Tips
- Chunk size: 300–600 words is a good starting range
- Increase n_results for complex questions
-
Metadata filtering:
where={'source': 'specific.pdf'}to restrict search - Reranking: use a cross-encoder reranker for production quality
- Caching: cache embeddings to speed up re-indexing
Summary
You built a RAG chatbot that:
- Loads PDF and text documents
- Splits into overlapping chunks
- Embeds and stores in ChromaDB
- Retrieves relevant chunks per question
- Generates grounded answers with Claude
Top comments (0)