RAG 시스템 실전 구축 (v6)
1. RAG 기초 개념
Retrieval-Augmented Generation (RAG)은 대규모 언어 모델(LLM)의 지식 범위를 확장하는 효율적인 방법입니다. RAG는 다음 세 가지 주요 단계로 구성됩니다:
- 검색 (Retrieval): 질문과 관련된 문서 조각을 벡터 데이터베이스에서 검색합니다.
- 증강 (Augmentation): 검색된 문서를 프롬프트에 포함하여 LLM의 입력을 향상시킵니다.
- 생성 (Generation): 증강된 입력을 기반으로 답변을 생성합니다.
# 간단한 RAG 루프 구현
class SimpleRAG:
def __init__(self, embedding_model, vector_db):
self.embedding_model = embedding_model
self.vector_db = vector_db
def query(self, question):
# 1. 질문 임베딩
query_embedding = self.embedding_model.encode(question)
# 2. 검색
relevant_docs = self.vector_db.search(query_embedding, k=5)
# 3. 증강 및 생성
context = "\n".join([doc.content for doc in relevant_docs])
prompt = f"질문: {question}\n문맥: {context}"
return self.generate_answer(prompt)
2. 청킹 전략
2.1 의미적 청킹 (Semantic Chunking)
문서의 의미를 기반으로 청킹하여 의미 단위를 유지합니다.
from sentence_transformers import SentenceTransformer
import numpy as np
class SemanticChunker:
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def chunk_semantic(self, text, max_tokens=512):
sentences = text.split('. ')
embeddings = self.model.encode(sentences)
# 문장 간 유사도 기반 그룹화
chunks = []
current_chunk = []
current_embedding = None
for i, (sentence, embedding) in enumerate(zip(sentences, embeddings)):
if not current_chunk:
current_chunk.append(sentence)
current_embedding = embedding
else:
# 유사도 계산
similarity = np.dot(current_embedding, embedding) / (
np.linalg.norm(current_embedding) * np.linalg.norm(embedding)
)
if similarity > 0.8 and len(current_chunk) < 10:
current_chunk.append(sentence)
else:
chunks.append('. '.join(current_chunk))
current_chunk = [sentence]
current_embedding = embedding
if current_chunk:
chunks.append('. '.join(current_chunk))
return chunks
2.2 재귀적 청킹 (Recursive Chunking)
문서를 여러 레벨로 분할하여 다양한 청킹 수준을 제공합니다.
class RecursiveChunker:
def __init__(self, chunk_size=1024, overlap=128):
self.chunk_size = chunk_size
self.overlap = overlap
def chunk_recursive(self, text):
if len(text) <= self.chunk_size:
return [text]
chunks = []
start = 0
while start < len(text):
end = min(start + self.chunk_size, len(text))
# 오버랩 처리
if start > 0:
overlap_start = max(0, start - self.overlap)
chunk_content = text[overlap_start:end]
else:
chunk_content = text[start:end]
chunks.append(chunk_content)
start = end - self.overlap
return chunks
2.3 에이전트 기반 청킹 (Agentic Chunking)
문서의 구조적 특징을 고려한 청킹입니다.
import re
class AgenticChunker:
def __init__(self):
self.section_patterns = [
r'##\s+(.+)',
r'#\s+(.+)',
r'\*\*\s+(.+)\s+\*\*'
]
def chunk_by_structure(self, text):
# 섹션 기준으로 분할
sections = re.split(r'(\n## |\n# )', text)
chunks = []
current_chunk = ""
for i, part in enumerate(sections):
if i % 2 == 1: # 섹션 헤더
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = part
else: # 섹션 내용
current_chunk += part
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
3. 임베딩 모델 선택 및 비교
3.1 모델 비교
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
class EmbeddingBenchmark:
def __init__(self):
self.models = {
"all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
"all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
"sentence-t5-xxl": SentenceTransformer("sentence-t5-xxl")
}
def compare_models(self, texts, benchmark_dataset):
results = {}
for model_name, model in self.models.items():
# 성능 측정
start_time = time.time()
embeddings = model.encode(texts)
end_time = time.time()
# 정확도 평가 (가상 데이터셋 사용)
accuracy = self.evaluate_accuracy(embeddings, benchmark_dataset)
results[model_name] = {
"latency": end_time - start_time,
"accuracy": accuracy,
"size": model.get_sentence_embedding_dimension()
}
return results
# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["Python은 강력한 프로그래밍 언어입니다.", "Machine Learning은 AI의 한 분야입니다."]
results = benchmark.compare_models(texts, "benchmark_dataset")
4. 벡터 데이터베이스 비교
4.1 Chroma vs Qdrant vs pgvector vs Milvus
# Chroma 구현
import chromadb
from chromadb import Client
class ChromaVectorDB:
def __init__(self):
self.client = Client()
self.collection = self.client.get_or_create_collection("rag_docs")
def add_documents(self, documents, embeddings):
self.collection.add(
documents=documents,
embeddings=embeddings,
ids=[f"doc_{i}" for i in range(len(documents))]
)
def search(self, query_embedding, k=5):
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=k
)
return results["documents"][0]
# Qdrant 구현
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self):
self.client = QdrantClient(host="localhost", port=6333)
self.collection_name = "rag_docs"
def add_documents(self, documents, embeddings):
self.client.upsert(
collection_name=self.collection_name,
points=[
{
"id": i,
"vector": embedding,
"payload": {"text": doc}
}
for i, (doc, embedding) in enumerate(zip(documents, embeddings))
]
)
def search(self, query_embedding, k=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_embedding,
limit=k
)
return [hit.payload["text"] for hit in results]
# pgvector 구현
import psycopg2
from psycopg2.extras import Json
class PgVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
def create_table(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS rag_documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(768)
)
""")
self.conn.commit()
def add_documents(self, documents, embeddings):
with self.conn.cursor() as cur:
for doc, embedding in zip(documents, embeddings):
cur.execute(
"INSERT INTO rag_documents (content, embedding) VALUES (%s, %s)",
(doc, embedding)
)
self.conn.commit()
def search(self, query_embedding, k=5):
with self.conn.cursor() as cur:
cur.execute("""
SELECT content FROM rag_documents
ORDER BY embedding <#> %s
LIMIT %s
""", (query_embedding, k))
return [row[0] for row in cur.fetchall()]
4.2 성능 비교
python
import time
def benchmark_vector_dbs(documents, embeddings, queries):
dbs = {
"Chroma": ChromaVectorDB(),
"Qdrant": QdrantVectorDB(),
"pgvector": PgVectorDB("postgresql://user:pass@
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)