DEV Community

matias yoon
matias yoon

Posted on

RAG 시스템 실전 구축 (v19)

RAG 시스템 실전 구축 (v19)

1. RAG 시스템 기본 개념

RAG (Retrieval-Augmented Generation)는 검색 기반 생성 모델로, LLM이 외부 지식을 활용해 더 정확하고 최신 정보를 제공할 수 있게 합니다. 핵심 루프는 다음과 같습니다:

사용자 질문 → 검색 (Retrieval) → 증강 (Augmentation) → 생성 (Generation)
Enter fullscreen mode Exit fullscreen mode

RAG 루프 구조

# 단순 RAG 루프 예시
class SimpleRAG:
    def __init__(self, embedder, vector_db, generator):
        self.embedder = embedder
        self.vector_db = vector_db
        self.generator = generator

    def query(self, question):
        # 1. 질문 임베딩
        query_embedding = self.embedder.encode(question)

        # 2. 검색
        relevant_docs = self.vector_db.search(query_embedding, k=5)

        # 3. 증강 (context 구성)
        context = " ".join([doc.content for doc in relevant_docs])

        # 4. 생성
        response = self.generator.generate(question, context)
        return response
Enter fullscreen mode Exit fullscreen mode

2. 청킹 전략 (Chunking Strategies)

2.1 의미적 청킹 (Semantic Chunking)

from sentence_transformers import SentenceTransformer
import numpy as np

class SemanticChunker:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def chunk_by_semantic(self, text, threshold=0.75):
        sentences = self.split_into_sentences(text)
        embeddings = self.model.encode(sentences)

        chunks = []
        current_chunk = [sentences[0]]
        current_embedding = embeddings[0]

        for i in range(1, len(sentences)):
            similarity = self.cosine_similarity(current_embedding, embeddings[i])
            if similarity > threshold:
                current_chunk.append(sentences[i])
            else:
                chunks.append(" ".join(current_chunk))
                current_chunk = [sentences[i]]
                current_embedding = embeddings[i]

        if current_chunk:
            chunks.append(" ".join(current_chunk))
        return chunks

    def cosine_similarity(self, a, b):
        return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
Enter fullscreen mode Exit fullscreen mode

2.2 재귀적 청킹 (Recursive Chunking)

class RecursiveChunker:
    def __init__(self, max_chunk_size=512):
        self.max_chunk_size = max_chunk_size

    def recursive_chunk(self, text, separators=["\n\n", "\n", " ", ""]):
        chunks = []

        def split_recursive(text, level=0):
            if level >= len(separators) or len(text) <= self.max_chunk_size:
                chunks.append(text)
                return

            separator = separators[level]
            parts = text.split(separator)

            current_chunk = ""
            for part in parts:
                if len(current_chunk) + len(part) + len(separator) <= self.max_chunk_size:
                    current_chunk += part + separator
                else:
                    if current_chunk:
                        chunks.append(current_chunk.strip())
                    current_chunk = part + separator

            if current_chunk and len(current_chunk) > 0:
                chunks.append(current_chunk.strip())

        split_recursive(text)
        return chunks
Enter fullscreen mode Exit fullscreen mode

2.3 에이전트 기반 청킹 (Agentic Chunking)

class AgenticChunker:
    def __init__(self, prompt_template):
        self.prompt_template = prompt_template

    def chunk_with_agent(self, text, max_tokens=1000):
        # 텍스트를 여러 부분으로 분할하고 각 부분을 요약하여 청킹
        sections = self.divide_into_sections(text)
        chunks = []

        for section in sections:
            if len(section) > max_tokens:
                sub_sections = self.split_section(section, max_tokens)
                chunks.extend(sub_sections)
            else:
                chunks.append(section)

        return chunks

    def divide_into_sections(self, text):
        # 제목 기준 분할
        import re
        sections = re.split(r'(#{1,6}\s+.*?)(?=\n#{1,6}|\Z)', text, flags=re.DOTALL)
        return [s for s in sections if s.strip()]
Enter fullscreen mode Exit fullscreen mode

3. 임베딩 모델 선택 및 비교

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class EmbeddingBenchmark:
    def __init__(self):
        self.models = {
            "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
            "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
            "multi-qa-MiniLM-L6-v2": SentenceTransformer("multi-qa-MiniLM-L6-v2")
        }

    def compare_models(self, texts, queries):
        results = {}

        for model_name, model in self.models.items():
            # 텍스트 임베딩
            text_embeddings = model.encode(texts)
            query_embeddings = model.encode(queries)

            # 유사도 계산
            similarities = cosine_similarity(query_embeddings, text_embeddings)
            results[model_name] = {
                "mean_similarity": np.mean(similarities),
                "std_similarity": np.std(similarities),
                "top_k_similarities": np.max(similarities, axis=1)
            }

        return results

# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["Python은 인기 있는 프로그래밍 언어입니다.", "JavaScript는 웹 개발에 사용됩니다."]
queries = ["Python에 대해 설명해 주세요.", "웹 개발 언어는 무엇인가요?"]
results = benchmark.compare_models(texts, queries)
print(results)
Enter fullscreen mode Exit fullscreen mode

4. 벡터 데이터베이스 비교

4.1 Chroma (로컬)

import chromadb
from chromadb.utils import embedding_functions

class ChromaVectorDB:
    def __init__(self, collection_name="rag_collection"):
        self.client = chromadb.Client()
        self.collection = self.client.get_or_create_collection(
            name=collection_name,
            embedding_function=embedding_functions.DefaultEmbeddingFunction()
        )

    def add_documents(self, documents, ids):
        self.collection.add(
            documents=documents,
            ids=ids
        )

    def search(self, query, k=5):
        results = self.collection.query(
            query_texts=[query],
            n_results=k
        )
        return results['documents'][0]
Enter fullscreen mode Exit fullscreen mode

4.2 Qdrant

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = "rag_collection"

        if not self.client.collection_exists(collection_name=self.collection_name):
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(size=384, distance="Cosine")
            )

    def add_documents(self, documents, ids):
        self.client.upsert(
            collection_name=self.collection_name,
            points=[
                {
                    "id": idx,
                    "vector": doc.embedding,
                    "payload": {"content": doc.content}
                } for idx, doc in enumerate(documents)
            ]
        )

    def search(self, query_vector, k=5):
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=k
        )
        return [hit.payload['content'] for hit in results]
Enter fullscreen mode Exit fullscreen mode

4.3 pgvector (PostgreSQL 확장)

import psycopg2
import numpy as np

class PGVectorDB:
    def __init__(self, connection_string):
        self.conn = psycopg2.connect(connection_string)
        self.setup_table()

    def setup_table(self):
        with self.conn.cursor() as cur:
            cur.execute("""
                CREATE TABLE IF NOT EXISTS rag_documents (
                    id SERIAL PRIMARY KEY,
                    content TEXT,
                    embedding VECTOR(384)
                )
            """)
            cur.execute("""
                CREATE INDEX IF NOT EXISTS idx_embedding ON rag_documents 
                USING ivfflat (embedding vector_cosine_ops)
            """)
        self.conn.commit()

    def add_documents(self, documents):
        with self.conn.cursor() as cur:
            for doc in documents:
                cur.execute(
                    "INSERT INTO rag_documents (content, embedding) VALUES (%s, %s)",
                    (doc.content, doc.embedding.tolist())
                )
        self.conn.commit()

    def search(self, query_vector, k=5):
        with self.conn.cursor() as cur:
            cur.execute("""
                SELECT content FROM rag_documents 
                ORDER BY embedding <=> %s 
                LIMIT %s
            """, (query_vector.tolist(), k))
            return [row[0] for row in cur.fetchall()]
Enter fullscreen mode Exit fullscreen mode

5. 완전한 RAG 파이프라인 구현


python
import os
from typing import List, Dict, Any
from

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)