RAG 시스템 실전 구축 (v30)
1. RAG 시스템 기본 구조
RAG (Retrieval-Augmented Generation) 시스템은 검색 기반의 생성 모델로, 대규모 언어 모델(LLM)이 외부 지식을 활용해 더 정확하고 최신 정보를 생성할 수 있게 해줍니다.
핵심 루프 구성
# RAG 기본 루프 구현
def rag_pipeline(query, vector_db, llm):
# 1. 검색 (Retrieval)
retrieved_docs = vector_db.search(query, k=5)
# 2. 보완 (Augmentation)
context = format_context(retrieved_docs)
augmented_query = f"Context: {context}\n\nQuestion: {query}"
# 3. 생성 (Generation)
response = llm.generate(augmented_query)
return response
2. 청킹 전략 비교
2.1 의미적 청킹 (Semantic Chunking)
from sentence_transformers import SentenceTransformer
import numpy as np
def semantic_chunking(text, model, threshold=0.7):
"""의미 기반 청킹 - 문장 단위로 의미를 기준으로 분할"""
sentences = text.split('. ')
embeddings = model.encode(sentences)
chunks = []
current_chunk = []
current_embedding = None
for i, (sentence, embedding) in enumerate(zip(sentences, embeddings)):
if current_embedding is None:
current_chunk.append(sentence)
current_embedding = embedding
else:
similarity = cosine_similarity(current_embedding, embedding)
if similarity > threshold:
current_chunk.append(sentence)
else:
chunks.append(' '.join(current_chunk))
current_chunk = [sentence]
current_embedding = embedding
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
2.2 재귀적 청킹 (Recursive Chunking)
def recursive_chunking(text, chunk_size=500, overlap=50):
"""재귀적 청킹 - 중첩된 텍스트 청킹"""
chunks = []
start = 0
while start < len(text):
end = min(start + chunk_size, len(text))
chunk = text[start:end]
chunks.append(chunk)
start = end - overlap
return chunks
2.3 에이전트 기반 청킹
class AgentBasedChunker:
def __init__(self, model):
self.model = model
def chunk_with_structure(self, text):
"""문서 구조를 고려한 청킹"""
# 제목, 서브제목 기준으로 청킹
sections = self.identify_sections(text)
chunks = []
for section in sections:
if len(section['content']) > 1000:
# 긴 섹션은 추가로 청킹
sub_chunks = self.split_section(section['content'])
chunks.extend(sub_chunks)
else:
chunks.append(section['content'])
return chunks
3. 임베딩 모델 선택 및 비교
3.1 모델 선택 기준
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
class EmbeddingBenchmark:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
'BAAI/bge-small-en': SentenceTransformer('BAAI/bge-small-en'),
'sentence-bert': SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
}
def evaluate_models(self, texts, reference_embeddings):
"""모델 성능 비교"""
results = {}
for name, model in self.models.items():
# 임베딩 생성
embeddings = model.encode(texts)
# 유사도 계산 (예: cosine similarity)
similarities = self.calculate_similarities(
embeddings, reference_embeddings
)
# 평가 점수
avg_similarity = np.mean(similarities)
results[name] = {
'avg_similarity': avg_similarity,
'model_size': self.get_model_size(model),
'inference_time': self.benchmark_inference(model, texts)
}
return results
# 성능 비교 예시
benchmark = EmbeddingBenchmark()
results = benchmark.evaluate_models(sample_texts, reference_embeddings)
3.2 최적화된 임베딩 생성
class OptimizedEmbedder:
def __init__(self, model_name='all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
self.model.max_seq_length = 512 # 최적화된 시퀀스 길이
def embed_batch(self, texts, batch_size=32):
"""배치 처리 최적화"""
embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i+batch_size]
batch_embeddings = self.model.encode(
batch,
show_progress_bar=False,
convert_to_tensor=True
)
embeddings.append(batch_embeddings)
return torch.cat(embeddings, dim=0)
4. 벡터 데이터베이스 비교
4.1 Chroma vs Qdrant vs pgvector vs Milvus
# Chroma 구현
import chromadb
from chromadb import Client
class ChromaVectorDB:
def __init__(self, collection_name="rag_collection"):
self.client = Client()
self.collection = self.client.get_or_create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
def add_documents(self, documents, embeddings, ids):
self.collection.add(
documents=documents,
embeddings=embeddings,
ids=ids
)
def search(self, query_embedding, n_results=5):
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=n_results
)
return results['documents'][0]
# Qdrant 구현
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
def create_collection(self, collection_name, vector_size):
self.client.recreate_collection(
collection_name=collection_name,
vectors_config={"size": vector_size, "distance": "Cosine"}
)
def search(self, query_vector, collection_name, limit=5):
results = self.client.search(
collection_name=collection_name,
query_vector=query_vector,
limit=limit
)
return [hit.payload for hit in results]
# pgvector 구현
import psycopg2
from psycopg2.extras import execute_values
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
def search(self, query_vector, limit=5):
with self.conn.cursor() as cursor:
cursor.execute("""
SELECT content, distance
FROM documents
ORDER BY embedding <-> %s
LIMIT %s
""", (query_vector, limit))
return cursor.fetchall()
4.2 성능 비교 테스트
import time
import numpy as np
def benchmark_vector_dbs(documents, query_embeddings, k=5):
"""각 벡터 데이터베이스 성능 비교"""
results = {}
# Chroma 테스트
chroma_db = ChromaVectorDB()
start_time = time.time()
chroma_results = [chroma_db.search(q, k) for q in query_embeddings[:100]]
chroma_time = time.time() - start_time
results['chroma'] = chroma_time
# Qdrant 테스트
qdrant_db = QdrantVectorDB()
start_time = time.time()
qdrant_results = [qdrant_db.search(q, "rag_collection", k)
for q in query_embeddings[:100]]
qdrant_time = time.time() - start_time
results['qdrant'] = qdrant_time
return results
5. 전체 RAG 파이프라인 구현
python
import asyncio
from typing import List, Dict, Any
from sentence_transformers import SentenceTransformer
import numpy as np
class FullRAGPipeline:
def __init__(self,
embedding_model_name='all-MiniLM-L6-v2',
vector_db_type='chroma',
llm_model='gpt-3.5-turbo'):
# 임베딩 모델
self.embedder = SentenceTransformer(embedding_model_name)
# 벡터 DB
if vector_db_type == 'chroma':
self.vector_db = ChromaVectorDB()
elif vector_db_type == 'qdrant
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)