RAG 시스템 실전 구축 (v18)
1. RAG의 핵심 개념
RAG(Retrieval-Augmented Generation)는 대규모 언어 모델(LLM)의 성능을 향상시키기 위한 아키텍처입니다. 다음과 같은 세 가지 단계로 작동합니다:
- Retrieval: 사용자 질문과 관련된 문서 조각을 검색합니다.
- Augmentation: 검색된 문서를 질문에 맞게 증강합니다.
- Generation: 증강된 컨텍스트를 기반으로 답변을 생성합니다.
이 루프는 다음과 같은 순서로 작동합니다:
# RAG 루프의 기본 구조
def rag_loop(query, vector_db, embedding_model, llm):
# 1. 검색
retrieved_docs = vector_db.search(query, top_k=5)
# 2. 증강
augmented_context = augment_context(query, retrieved_docs)
# 3. 생성
answer = llm.generate(augmented_context, query)
return answer
2. Chunking 전략
2.1 Semantic Chunking
의미 기반으로 문서를 분할하여 의미 단위를 유지합니다:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.cluster import KMeans
class SemanticChunker:
def __init__(self, model_name='all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
def chunk_by_semantic_distance(self, text, threshold=0.7):
sentences = self.split_sentences(text)
embeddings = self.model.encode(sentences)
# 의미 거리 기반 클러스터링
kmeans = KMeans(n_clusters=len(sentences)//2)
clusters = kmeans.fit_predict(embeddings)
chunks = []
for i in range(len(sentences)):
if i == 0 or clusters[i] != clusters[i-1]:
chunks.append(sentences[i])
else:
chunks[-1] += " " + sentences[i]
return chunks
# 사용 예시
chunker = SemanticChunker()
chunks = chunker.chunk_by_semantic_distance("문서 내용...", threshold=0.7)
2.2 Recursive Chunking
재귀적으로 작은 단위로 분할:
import re
class RecursiveChunker:
def __init__(self, chunk_size=512, overlap=50):
self.chunk_size = chunk_size
self.overlap = overlap
def chunk_recursive(self, text):
# 문단 기반 분할
paragraphs = text.split('\n\n')
chunks = []
for para in paragraphs:
if len(para) <= self.chunk_size:
chunks.append(para)
else:
# 문장 기반 분할
sentences = re.split(r'[.!?]+', para)
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < self.chunk_size:
current_chunk += sentence + ". "
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = sentence + ". "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
2.3 Agentic Chunking
자동으로 의미 있는 단위를 추출하는 방식:
class AgenticChunker:
def __init__(self, chunk_size=512):
self.chunk_size = chunk_size
def chunk_with_entity_detection(self, text):
# 단어/구를 기반으로 문맥을 분석
# 여기서는 간단한 예시
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) > self.chunk_size:
chunks.append(' '.join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
current_length += len(word) + 1
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
3. 임베딩 모델 선택
3.1 모델 비교
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np
class EmbeddingBenchmark:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
'sentence-t5-3b': SentenceTransformer('sentence-t5-3b')
}
def benchmark_model(self, texts, model_name):
model = self.models[model_name]
embeddings = model.encode(texts)
return embeddings
def compare_models(self, test_texts):
results = {}
for name, model in self.models.items():
# 성능 측정
start_time = time.time()
embeddings = model.encode(test_texts)
end_time = time.time()
results[name] = {
'time': end_time - start_time,
'size': len(embeddings),
'dimensions': len(embeddings[0])
}
return results
# 성능 비교
benchmark = EmbeddingBenchmark()
test_data = ["테스트 문장 1", "테스트 문장 2", "테스트 문장 3"]
results = benchmark.compare_models(test_data)
print(results)
3.2 최적 모델 선택 가이드
- all-MiniLM-L6-v2: 속도와 정확성의 균형
- all-mpnet-base-v2: 더 높은 정확성
- sentence-t5-3b: 대규모 문서 처리
4. 벡터 데이터베이스 비교
4.1 Chroma 비교
import chromadb
from chromadb.config import Settings
class ChromaDBManager:
def __init__(self, persist_directory="./chroma_db"):
self.client = chromadb.PersistentClient(path=persist_directory)
self.collection = self.client.get_or_create_collection("docs")
def add_documents(self, documents, embeddings, ids):
self.collection.add(
documents=documents,
embeddings=embeddings,
ids=ids
)
def search(self, query, top_k=5):
results = self.collection.query(
query_texts=[query],
n_results=top_k
)
return results['documents'][0], results['distances'][0]
4.2 Qdrant 비교
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue
class QdrantManager:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
self.collection_name = "rag_docs"
def create_collection(self, vector_size=384):
self.client.recreate_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=vector_size,
distance="Cosine"
)
)
def search(self, query_vector, top_k=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
limit=top_k
)
return results
4.3 pgvector 비교
import psycopg2
from psycopg2.extras import Json
import numpy as np
class PGVectorManager:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
self.cursor = self.conn.cursor()
def create_table(self):
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS documents (
id UUID PRIMARY KEY,
content TEXT,
embedding VECTOR(384)
)
""")
self.conn.commit()
def search(self, query_vector, top_k=5):
self.cursor.execute("""
SELECT content, embedding
FROM documents
ORDER BY embedding <-> %s
LIMIT %s
""", (query_vector, top_k))
return self.cursor.fetchall()
5. 전체 RAG 파이프라인 구현
python
import numpy as np
from sentence_transformers import SentenceTransformer
from chromadb import PersistentClient
import time
class CompleteRAGPipeline:
def __init__(self, model_name='all-MiniLM-L6-v2'):
self.embedding_model = SentenceTransformer(model_name)
self.vector_db = PersistentClient(path="./rag_db")
self.collection = self.vector_db.get_or_create_collection("docs")
def prepare_documents(self, documents):
"""문서 준비 및 청킹"""
chunker = RecursiveChunker(chunk_size=512)
chunks = []
ids = []
for i, doc in enumerate(documents):
doc_chunks = chunker.chunk_recursive(doc)
for j, chunk in enumerate(doc_chunks):
chunks.append(chunk)
ids.append(f"{i}-{j}")
return chunks, ids
def embed_and_store(self, documents):
"""문서 임베딩 및
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)