RAG 시스템 실전 구축 (v20)
1. RAG 시스템 기본 개념
Retrieval-Augmentation-Generation (RAG)은 대규모 언어 모델(LLM)을 활용하여 외부 지식을 통합하는 아키텍처입니다. 이 시스템은 다음 세 가지 단계를 반복합니다:
- 검색 (Retrieval): 사용자 질문과 관련된 문서 조각들 검색
- 보완 (Augmentation): 검색된 정보를 증강하여 프롬프트 생성
- 생성 (Generation): LLM이 증강된 프롬프트를 기반으로 답변 생성
class RAGPipeline:
def __init__(self, embedding_model, vector_db, llm):
self.embedding_model = embedding_model
self.vector_db = vector_db
self.llm = llm
def process_query(self, query):
# 1. 질문 임베딩 생성
query_embedding = self.embedding_model.encode(query)
# 2. 유사 문서 검색
relevant_docs = self.vector_db.search(query_embedding, k=5)
# 3. 프롬프트 구성
context = "\n".join([doc['text'] for doc in relevant_docs])
prompt = f"Context: {context}\n\nQuestion: {query}"
# 4. 답변 생성
response = self.llm.generate(prompt)
return response
2. 문서 청킹 전략
2.1 의미적 청킹 (Semantic Chunking)
의미 단위로 문서를 분할하여 의미적 관련성을 유지합니다.
import numpy as np
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer
class SemanticChunker:
def __init__(self, model_name='all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
def chunk_by_semantic(self, text, chunk_size=512):
sentences = text.split('. ')
embeddings = self.model.encode(sentences)
# 클러스터링을 통한 의미적 그룹화
kmeans = KMeans(n_clusters=min(len(sentences), 10))
kmeans.fit(embeddings)
labels = kmeans.labels_
chunks = []
current_chunk = []
current_label = labels[0]
for i, (sentence, label) in enumerate(zip(sentences, labels)):
if label != current_label and len(current_chunk) > 0:
chunks.append('. '.join(current_chunk))
current_chunk = []
current_label = label
current_chunk.append(sentence)
if current_chunk:
chunks.append('. '.join(current_chunk))
return chunks
2.2 재귀적 청킹 (Recursive Chunking)
문서를 반복적으로 하위 문서로 분할하여 최적의 청킹 크기를 찾습니다.
class RecursiveChunker:
def __init__(self, chunk_size=1024, overlap=128):
self.chunk_size = chunk_size
self.overlap = overlap
def chunk_recursive(self, text):
chunks = []
start = 0
while start < len(text):
end = min(start + self.chunk_size, len(text))
chunk = text[start:end]
chunks.append(chunk)
start = end - self.overlap
return chunks
2.3 에이전트 기반 청킹 (Agentic Chunking)
문서 구조를 고려하여 의미 있는 단위로 청킹합니다.
class AgenticChunker:
def __init__(self):
# 문서 구조를 인식하는 규칙 정의
self.section_patterns = [
r'##\s+(.+)', # 헤딩 2
r'#\s+(.+)', # 헤딩 1
r'###\s+(.+)', # 헤딩 3
]
def chunk_by_structure(self, text):
# 단락 구분을 기준으로 청킹
paragraphs = text.split('\n\n')
chunks = []
current_chunk = []
chunk_size = 0
for para in paragraphs:
if chunk_size + len(para) > 1000 and current_chunk:
chunks.append('\n\n'.join(current_chunk))
current_chunk = [para]
chunk_size = len(para)
else:
current_chunk.append(para)
chunk_size += len(para)
if current_chunk:
chunks.append('\n\n'.join(current_chunk))
return chunks
3. 임베딩 모델 선택 및 비교
3.1 모델 비교 테스트
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import time
class EmbeddingBenchmark:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
'sentence-t5-xxl': SentenceTransformer('sentence-t5-xxl')
}
def benchmark_models(self, test_queries, test_documents):
results = {}
for name, model in self.models.items():
start_time = time.time()
# 임베딩 생성
query_embeddings = model.encode(test_queries)
doc_embeddings = model.encode(test_documents)
# 유사도 계산
similarities = cosine_similarity(query_embeddings, doc_embeddings)
end_time = time.time()
results[name] = {
'latency': end_time - start_time,
'similarity_matrix': similarities,
'size': model.get_sentence_features(None).shape[1]
}
return results
# 사용 예시
benchmark = EmbeddingBenchmark()
test_queries = ["Python 언어의 장점", "AI 기술 발전"]
test_docs = ["Python은 간단하고 읽기 쉬운 언어입니다", "AI는 인공지능 기술입니다"]
results = benchmark.benchmark_models(test_queries, test_docs)
print("모델 성능 비교:")
for model, metrics in results.items():
print(f"{model}: {metrics['latency']:.2f}초, 크기: {metrics['size']}")
4. 벡터 데이터베이스 비교
4.1 Chroma vs Qdrant vs pgvector vs Milvus
python
# Chroma
import chromadb
from chromadb.config import Settings
class ChromaVectorDB:
def __init__(self):
self.client = chromadb.Client()
self.collection = self.client.get_or_create_collection("rag_collection")
def add_documents(self, documents, embeddings):
self.collection.add(
embeddings=embeddings,
documents=documents,
ids=[str(i) for i in range(len(documents))]
)
def search(self, query_embedding, k=5):
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=k
)
return [{"text": doc, "score": score}
for doc, score in zip(results['documents'][0], results['distances'][0])]
# Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self):
self.client = QdrantClient(":memory:") # 메모리 사용
self.collection_name = "rag_collection"
self.client.recreate_collection(
collection_name=self.collection_name,
vectors_config={"size": 384, "distance": "Cosine"}
)
def add_documents(self, documents, embeddings):
points = [
{
"id": i,
"vector": embedding,
"payload": {"text": doc}
}
for i, (doc, embedding) in enumerate(zip(documents, embeddings))
]
self.client.upsert(self.collection_name, points)
def search(self, query_embedding, k=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_embedding,
limit=k
)
return [{"text": hit.payload['text'], "score": hit.score}
for hit in results]
# pgvector
import psycopg2
import numpy as np
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
self.create_table()
def create_table(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS rag_documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(384)
)
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS embedding_idx
ON rag_documents USING ivfflat (embedding vector_cosine_ops)
""")
self.conn.commit()
def add_documents(self, documents, embeddings):
with self.conn.cursor() as cur:
for doc, embedding in zip(documents, embeddings):
cur.execute(
"INSERT INTO rag_documents (content, embedding) VALUES (%s, %s)",
(doc, embedding.tolist())
)
self.conn
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)