This article was originally published on AI Study Room. For the full version with working code examples and related articles, visit the original post.
Building RAG From Scratch: A 200-Line Implementation Without Frameworks
RAG Without Frameworks
Retrieval-Augmented Generation (RAG) is the most practical AI pattern of the decade: give an LLM access to your documents so it can answer questions grounded in your data. While LangChain and LlamaIndex make RAG easy to prototype, they add abstractions that hide what's actually happening. Building RAG from scratch — with raw API calls, a vector database, and ~200 lines of code — gives you complete control and a deeper understanding. Here's how.
The RAG Pipeline (Five Steps)
┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐
│ 1. Load │───▶│ 2. Chunk │───▶│ 3. Embed │───▶│ 4. Store │───▶│ 5. Query │
│Documents │ │Documents │ │ Chunks │ │ Vectors │ │ & Generate│
└──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘
Step 1: Load Documents
# Minimal document loader — supports .txt, .md, .pdf
import pathlib
import pypdf # for PDF support
def load_documents(path: str) -> list[dict]:
docs = []
for file in pathlib.Path(path).rglob("*"):
if file.suffix == ".txt":
docs.append({"text": file.read_text(), "source": str(file)})
elif file.suffix == ".md":
docs.append({"text": file.read_text(), "source": str(file)})
elif file.suffix == ".pdf":
reader = pypdf.PdfReader(file)
text = "
".join(page.extract_text() for page in reader.pages)
docs.append({"text": text, "source": str(file)})
return docs
Step 2: Chunk Documents
def chunk_document(text: str, chunk_size=500, overlap=50) -> list[str]:
"""Split text into overlapping chunks, respecting paragraph boundaries."""
paragraphs = text.split("
")
chunks = []
current = ""
for para in paragraphs:
if len(current) + len(para) <= chunk_size:
current += para + "
"
else:
if current:
chunks.append(current.strip())
# If a single paragraph exceeds chunk_size, split by sentences
if len(para) > chunk_size:
sentences = para.replace(". ", ".|").split("|")
sub = ""
for s in sentences:
if len(sub) + len(s) <= chunk_size:
sub += s + ". "
else:
chunks.append(sub.strip())
sub = s + ". "
current = sub + "
"
else:
current = para + "
"
if current.strip():
chunks.append(current.strip())
return chunks
Step 3: Generate Embeddings
from openai import OpenAI
client = OpenAI()
def embed_batch(texts: list[str], model="text-embedding-3-small") -> list[list[float]]:
"""Generate embeddings for a batch of texts."""
resp = client.embeddings.create(input=texts, model=model)
return [d.embedding for d in resp.data]
# For self-hosted: use sentence-transformers
# from sentence_transformers import SentenceTransformer
# model = SentenceTransformer("BAAI/bge-m3")
# embeddings = model.encode(texts, normalize_embeddings=True)
Step 4: Store in Vector Database
import psycopg
import numpy as np
def setup_pgvector():
conn = psycopg.connect("postgresql://localhost/rag_demo")
conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
conn.execute("""
CREATE TABLE IF NOT EXISTS documents (
id SERIAL PRIMARY KEY,
content TEXT,
source TEXT,
embedding vector(1536)
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_embedding
ON documents USING hnsw (embedding vector_cosine_ops)
""")
return conn
def insert_documents(conn, chunks, sources, embeddings):
for chunk, source, emb in zip(chunks, sources, embeddings):
conn.execute(
"INSERT INTO documents (content, source, embedding) VALUES (%s, %s, %s)",
(chunk, source, emb)
)
conn.commit()
Step 5: Query and Generate
def retrieve(conn, query: str, k: int = 5) -> list[dict]:
"""Vector search + optional keyword boost."""
query_embedding = embed_batch([query])[0]
results = conn.execute("""
SELECT content, source,
1 - (embedding <=> %s::vector) AS similarity
FROM documents
ORDER BY embedding <=> %s::vector
LIMIT %s
""", (query_embedding, query_embedding, k)).fetchall()
return [{"c
Read the full article on AI Study Room for complete code examples, comparison tables, and related resources.
Found this useful? Check out more developer guides and tool comparisons on AI Study Room.
Top comments (0)