RAG 시스템 실전 구축 (v2)
1. RAG 시스템 개요
RAG (Retrieval-Augmented Generation) 시스템은 정보 검색과 생성을 결합하여 정확한 답변을 제공하는 아키텍처입니다. 다음 세 단계로 구성됩니다:
- Retrieval: 사용자 질문과 유사한 문서 검색
- Augmentation: 검색된 문서를 프롬프트에 추가
- Generation: LLM이 검색된 정보를 기반으로 답변 생성
# RAG 프로세스 흐름
class RAGPipeline:
def __init__(self, retriever, generator):
self.retriever = retriever
self.generator = generator
def process_query(self, query):
# 1. 검색
retrieved_docs = self.retriever.retrieve(query)
# 2. 증강
augmented_prompt = self._augment_prompt(query, retrieved_docs)
# 3. 생성
response = self.generator.generate(augmented_prompt)
return response
def _augment_prompt(self, query, docs):
# 문서들을 프롬프트에 결합
context = "\n\n".join([doc.content for doc in docs])
return f"질문: {query}\n\n참조 문서:\n{context}"
2. Chunking 전략
문서를 적절한 크기로 분할하는 것이 중요합니다. 다음 세 가지 전략을 비교합니다:
2.1 Semantic Chunking
의미 기반으로 문단을 나누어 의미 단위를 유지합니다.
from sentence_transformers import SentenceTransformer
import numpy as np
class SemanticChunker:
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def chunk_documents(self, text, threshold=0.75):
sentences = text.split('. ')
embeddings = self.model.encode(sentences)
chunks = []
current_chunk = []
current_embedding = None
for i, (sentence, emb) in enumerate(zip(sentences, embeddings)):
if current_embedding is None:
current_chunk.append(sentence)
current_embedding = emb
else:
similarity = np.dot(current_embedding, emb) / (
np.linalg.norm(current_embedding) * np.linalg.norm(emb)
)
if similarity > threshold:
current_chunk.append(sentence)
else:
chunks.append('. '.join(current_chunk))
current_chunk = [sentence]
current_embedding = emb
if current_chunk:
chunks.append('. '.join(current_chunk))
return chunks
2.2 Recursive Chunking
문서를 여러 수준으로 재귀적으로 분할합니다.
class RecursiveChunker:
def __init__(self, chunk_size=512, overlap=50):
self.chunk_size = chunk_size
self.overlap = overlap
def chunk_recursive(self, text, max_depth=3):
chunks = []
self._recursive_split(text, chunks, 0, max_depth)
return chunks
def _recursive_split(self, text, chunks, depth, max_depth):
if len(text) <= self.chunk_size or depth >= max_depth:
chunks.append(text)
return
# 중간 지점을 찾아 분할
mid = len(text) // 2
split_point = text.rfind(' ', mid - self.overlap, mid + self.overlap)
if split_point == -1:
split_point = mid
chunks.append(text[:split_point])
self._recursive_split(text[split_point:], chunks, depth + 1, max_depth)
3. 임베딩 모델 선택
모델 성능과 속도를 고려하여 적절한 모델을 선택합니다:
from sentence_transformers import SentenceTransformer
import torch
import time
class EmbeddingBenchmark:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': 'small, fast',
'all-mpnet-base-v2': 'medium, balanced',
'sentence-t5-xxl': 'large, high quality'
}
def benchmark_models(self, texts):
results = {}
for model_name, description in self.models.items():
model = SentenceTransformer(model_name)
# 시간 측정
start_time = time.time()
embeddings = model.encode(texts)
end_time = time.time()
results[model_name] = {
'time': end_time - start_time,
'size': len(embeddings[0]),
'description': description
}
return results
# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["AI 기술 발전", "머신러닝 알고리즘", "데이터 분석"]
results = benchmark.benchmark_models(texts)
print(results)
4. Vector Database 비교
다음 네 가지 벡터 데이터베이스를 비교합니다:
4.1 Chroma
가장 간단한 로컬 테스트용
import chromadb
from chromadb.utils import embedding_functions
class ChromaVectorDB:
def __init__(self, collection_name="rag_collection"):
self.client = chromadb.Client()
self.collection = self.client.get_or_create_collection(
name=collection_name,
embedding_function=embedding_functions.DefaultEmbeddingFunction()
)
def add_documents(self, documents, ids):
self.collection.add(
documents=documents,
ids=ids
)
def search(self, query, top_k=5):
results = self.collection.query(
query_texts=[query],
n_results=top_k
)
return results['documents'][0]
4.2 Qdrant
고성능, 클라우드 호환
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
def create_collection(self, collection_name, vector_size=384):
self.client.recreate_collection(
collection_name=collection_name,
vectors_config={
"vector": {
"size": vector_size,
"distance": "Cosine"
}
}
)
def search(self, collection_name, query_vector, top_k=5):
results = self.client.search(
collection_name=collection_name,
query_vector=query_vector,
limit=top_k
)
return [hit.payload for hit in results]
4.3 pgvector
PostgreSQL 확장, 데이터베이스 통합이 용이
import psycopg2
import numpy as np
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
self.create_table_if_not_exists()
def create_table_if_not_exists(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS documents (
id UUID PRIMARY KEY,
content TEXT,
embedding VECTOR(384)
)
""")
self.conn.commit()
def search(self, query_embedding, top_k=5):
with self.conn.cursor() as cur:
cur.execute("""
SELECT content, 1 - (embedding <-> %s) as similarity
FROM documents
ORDER BY similarity DESC
LIMIT %s
""", (query_embedding, top_k))
return cur.fetchall()
5. 전체 RAG 파이프라인 구현
python
import os
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
import numpy as np
from typing import List, Tuple
class CompleteRAG:
def __init__(self, embedding_model="all-MiniLM-L6-v2"):
# 임베딩 모델 로드
self.embedder = SentenceTransformer(embedding_model)
# 벡터 DB 초기화
self.vector_db = QdrantClient(host="localhost", port=6333)
self.collection_name = "rag_docs"
# 초기화
self.initialize_vector_db()
def initialize_vector_db(self):
"""벡터 데이터베이스 초기화"""
self.vector_db.recreate_collection(
collection_name=self.collection_name,
vectors_config={"vector": {"size": 384, "distance": "Cosine"}}
)
def add_documents(self, documents: List[Tuple[str, str]]):
"""문서 추가"""
ids = [doc[0] for doc in documents]
contents = [doc[1] for doc in documents]
# 임베딩 생성
embeddings = self.embedder.encode(contents)
# 벡터 DB에 저장
self.vector_db.upsert(
collection_name=self.collection_name,
points=[
{
"id": doc_id,
"vector": embedding.tolist(),
"payload": {"content": content}
}
for doc_id, content, embedding in zip(ids, contents, embeddings)
]
)
def retrieve(self, query: str, top_k: int = 5):
"""검색
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)