DEV Community

matias yoon
matias yoon

Posted on

RAG 시스템 실전 구축 (v20)

RAG 시스템 실전 구축 (v20)

1. RAG 시스템 기본 개념

Retrieval-Augmentation-Generation (RAG)은 대규모 언어 모델(LLM)을 활용하여 외부 지식을 통합하는 아키텍처입니다. 이 시스템은 다음 세 가지 단계를 반복합니다:

  1. 검색 (Retrieval): 사용자 질문과 관련된 문서 조각들 검색
  2. 보완 (Augmentation): 검색된 정보를 증강하여 프롬프트 생성
  3. 생성 (Generation): LLM이 증강된 프롬프트를 기반으로 답변 생성
class RAGPipeline:
    def __init__(self, embedding_model, vector_db, llm):
        self.embedding_model = embedding_model
        self.vector_db = vector_db
        self.llm = llm

    def process_query(self, query):
        # 1. 질문 임베딩 생성
        query_embedding = self.embedding_model.encode(query)

        # 2. 유사 문서 검색
        relevant_docs = self.vector_db.search(query_embedding, k=5)

        # 3. 프롬프트 구성
        context = "\n".join([doc['text'] for doc in relevant_docs])
        prompt = f"Context: {context}\n\nQuestion: {query}"

        # 4. 답변 생성
        response = self.llm.generate(prompt)
        return response
Enter fullscreen mode Exit fullscreen mode

2. 문서 청킹 전략

2.1 의미적 청킹 (Semantic Chunking)

의미 단위로 문서를 분할하여 의미적 관련성을 유지합니다.

import numpy as np
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer

class SemanticChunker:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def chunk_by_semantic(self, text, chunk_size=512):
        sentences = text.split('. ')
        embeddings = self.model.encode(sentences)

        # 클러스터링을 통한 의미적 그룹화
        kmeans = KMeans(n_clusters=min(len(sentences), 10))
        kmeans.fit(embeddings)
        labels = kmeans.labels_

        chunks = []
        current_chunk = []
        current_label = labels[0]

        for i, (sentence, label) in enumerate(zip(sentences, labels)):
            if label != current_label and len(current_chunk) > 0:
                chunks.append('. '.join(current_chunk))
                current_chunk = []
                current_label = label

            current_chunk.append(sentence)

        if current_chunk:
            chunks.append('. '.join(current_chunk))

        return chunks
Enter fullscreen mode Exit fullscreen mode

2.2 재귀적 청킹 (Recursive Chunking)

문서를 반복적으로 하위 문서로 분할하여 최적의 청킹 크기를 찾습니다.

class RecursiveChunker:
    def __init__(self, chunk_size=1024, overlap=128):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def chunk_recursive(self, text):
        chunks = []
        start = 0

        while start < len(text):
            end = min(start + self.chunk_size, len(text))
            chunk = text[start:end]
            chunks.append(chunk)
            start = end - self.overlap

        return chunks
Enter fullscreen mode Exit fullscreen mode

2.3 에이전트 기반 청킹 (Agentic Chunking)

문서 구조를 고려하여 의미 있는 단위로 청킹합니다.

class AgenticChunker:
    def __init__(self):
        # 문서 구조를 인식하는 규칙 정의
        self.section_patterns = [
            r'##\s+(.+)',  # 헤딩 2
            r'#\s+(.+)',   # 헤딩 1
            r'###\s+(.+)', # 헤딩 3
        ]

    def chunk_by_structure(self, text):
        # 단락 구분을 기준으로 청킹
        paragraphs = text.split('\n\n')
        chunks = []

        current_chunk = []
        chunk_size = 0

        for para in paragraphs:
            if chunk_size + len(para) > 1000 and current_chunk:
                chunks.append('\n\n'.join(current_chunk))
                current_chunk = [para]
                chunk_size = len(para)
            else:
                current_chunk.append(para)
                chunk_size += len(para)

        if current_chunk:
            chunks.append('\n\n'.join(current_chunk))

        return chunks
Enter fullscreen mode Exit fullscreen mode

3. 임베딩 모델 선택 및 비교

3.1 모델 비교 테스트

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import time

class EmbeddingBenchmark:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-xxl': SentenceTransformer('sentence-t5-xxl')
        }

    def benchmark_models(self, test_queries, test_documents):
        results = {}

        for name, model in self.models.items():
            start_time = time.time()

            # 임베딩 생성
            query_embeddings = model.encode(test_queries)
            doc_embeddings = model.encode(test_documents)

            # 유사도 계산
            similarities = cosine_similarity(query_embeddings, doc_embeddings)

            end_time = time.time()

            results[name] = {
                'latency': end_time - start_time,
                'similarity_matrix': similarities,
                'size': model.get_sentence_features(None).shape[1]
            }

        return results

# 사용 예시
benchmark = EmbeddingBenchmark()
test_queries = ["Python 언어의 장점", "AI 기술 발전"]
test_docs = ["Python은 간단하고 읽기 쉬운 언어입니다", "AI는 인공지능 기술입니다"]

results = benchmark.benchmark_models(test_queries, test_docs)
print("모델 성능 비교:")
for model, metrics in results.items():
    print(f"{model}: {metrics['latency']:.2f}초, 크기: {metrics['size']}")
Enter fullscreen mode Exit fullscreen mode

4. 벡터 데이터베이스 비교

4.1 Chroma vs Qdrant vs pgvector vs Milvus


python
# Chroma
import chromadb
from chromadb.config import Settings

class ChromaVectorDB:
    def __init__(self):
        self.client = chromadb.Client()
        self.collection = self.client.get_or_create_collection("rag_collection")

    def add_documents(self, documents, embeddings):
        self.collection.add(
            embeddings=embeddings,
            documents=documents,
            ids=[str(i) for i in range(len(documents))]
        )

    def search(self, query_embedding, k=5):
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=k
        )
        return [{"text": doc, "score": score} 
                for doc, score in zip(results['documents'][0], results['distances'][0])]

# Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self):
        self.client = QdrantClient(":memory:")  # 메모리 사용
        self.collection_name = "rag_collection"

        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config={"size": 384, "distance": "Cosine"}
        )

    def add_documents(self, documents, embeddings):
        points = [
            {
                "id": i,
                "vector": embedding,
                "payload": {"text": doc}
            }
            for i, (doc, embedding) in enumerate(zip(documents, embeddings))
        ]
        self.client.upsert(self.collection_name, points)

    def search(self, query_embedding, k=5):
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_embedding,
            limit=k
        )
        return [{"text": hit.payload['text'], "score": hit.score} 
                for hit in results]

# pgvector
import psycopg2
import numpy as np

class PGVectorDB:
    def __init__(self, connection_string):
        self.conn = psycopg2.connect(connection_string)
        self.create_table()

    def create_table(self):
        with self.conn.cursor() as cur:
            cur.execute("""
                CREATE TABLE IF NOT EXISTS rag_documents (
                    id SERIAL PRIMARY KEY,
                    content TEXT,
                    embedding VECTOR(384)
                )
            """)
            cur.execute("""
                CREATE INDEX IF NOT EXISTS embedding_idx 
                ON rag_documents USING ivfflat (embedding vector_cosine_ops)
            """)
        self.conn.commit()

    def add_documents(self, documents, embeddings):
        with self.conn.cursor() as cur:
            for doc, embedding in zip(documents, embeddings):
                cur.execute(
                    "INSERT INTO rag_documents (content, embedding) VALUES (%s, %s)",
                    (doc, embedding.tolist())
                )
        self.conn

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)