Building a RAG from Scratch: A Beginner's Guide (Part 1: The Basic Pipeline)

#rag #llm #ai #fastapi

Welcome to the first post in our series on building an intelligent question-answering system. In this series, we'll document our journey of creating a powerful Q&A application that can answer questions about your own custom documents. By the end of this series, you'll have a solid understanding of how to build your own Retrieval Augmented Generation (RAG) pipeline from scratch.

The Goal for This Post

By the end of this post, you will have a simple, command-line-only RAG application that can answer questions about the documents in the data directory.

The Core Concepts

Our application is built around the concept of Retrieval Augmented Generation (RAG). RAG is a technique that combines the power of large language models (LLMs) with the ability to retrieve information from a custom knowledge base. It works by first retrieving relevant documents from your knowledge base and then using those documents as context for an LLM to generate an answer.

The Code

Here is the complete code for our basic RAG application.

requirements.txt

langchain
faiss-cpu
pdfplumber
python-dotenv
huggingface_hub

app/rag_logic.py

import os

from langchain_community.document_loaders.directory import DirectoryLoader
from langchain_community.document_loaders.pdf import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


def load_documents(directory_path):
    """Loads documents (e.g., PDFs) from the data/ folder."""
    loader = DirectoryLoader(
        directory_path, glob="**/*.pdf", loader_cls=PDFPlumberLoader
    )
    return loader.load()


def split_documents(documents):
    """Chunks the documents into smaller pieces."""
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return text_splitter.split_documents(documents)


def create_and_store_embeddings(chunks, vector_store_path="vector_store"):
    """Initializes the HuggingFaceEmbeddings, creates embeddings for the chunks, builds a FAISS vector store, and saves it to the vector_store/ directory."""
    embeddings = HuggingFaceEmbeddings()
    vector_store = FAISS.from_documents(chunks, embeddings)
    vector_store.save_local(vector_store_path)


def load_retriever(vector_store_path="vector_store"):
    """Loads the saved FAISS index and returns it as a LangChain retriever."""
    embeddings = HuggingFaceEmbeddings()
    vector_store = FAISS.load_local(vector_store_path, embeddings, allow_dangerous_deserialization=True)
    return vector_store.as_retriever()


def create_rag_chain(retriever, local_llm_url, api_key="lm-studio"):
    """Sets up the connection to your LM Studio server and creates the final retrieval chain."""

    template = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Question: {input}
    Context: {context}
    Answer:
    """
    prompt = PromptTemplate(template=template, input_variables=["input", "context"])

    llm = OpenAI(base_url=local_llm_url, api_key=api_key)

    document_chain = create_stuff_documents_chain(llm, prompt)
    return create_retrieval_chain(retriever, document_chain)

app/main.py

import os
from dotenv import load_dotenv

from app.rag_logic import (
    load_documents,
    split_documents,
    create_and_store_embeddings,
    load_retriever,
    create_rag_chain,
)

load_dotenv()

if __name__ == "__main__":
    vectordb_path = os.getenv("VECTOR_DB_DIR")
    data_dir = os.getenv("DATA_DIR")

    if not os.path.exists(vectordb_path) or not os.listdir(vectordb_path):
        print("Vector store not found, creating one...")
        documents = load_documents(data_dir)
        chunks = split_documents(documents)
        create_and_store_embeddings(chunks, vectordb_path)
    else:
        print("Loading existing vector store...")

    retriever = load_retriever(vectordb_path)
    local_llm_url = os.getenv("LOCAL_LLM_URL")
    if not local_llm_url:
        raise ValueError("LOCAL_LLM_URL environment variable not set.")
    rag_chain = create_rag_chain(retriever, local_llm_url)

    while True:
        question = input("Ask a question (or type 'quit' to exit): ")
        if question.lower() == 'quit':
            break
        response = rag_chain.invoke({"input": question, "context": ""})
        print("Answer:", response["answer"])

How to Run the Code

Install the dependencies:
```
pip install -r requirements.txt
```
Create a .env file:

Create a .env file in the root of the project and add the following environment variables:
```
LOCAL_LLM_URL=http://localhost:1234
VECTOR_DB_DIR=vector_store
DATA_DIR=data
```
Run the application:
```
python app/main.py
```