RAG 시스템 실전 구축 (v25)
1. RAG 기본 개념: 검색 → 보강 → 생성 루프
RAG (Retrieval-Augmented Generation)는 검색 기반 생성 시스템으로, LLM이 외부 정보를 검색하여 생성을 보강하는 아키텍처입니다. 이 시스템은 다음과 같은 루프를 따릅니다:
사용자 질문 → 검색 → 보강 → 생성
검색: 질문과 유사한 문서 조각을 벡터 데이터베이스에서 찾습니다.
보강: 검색된 문서와 질문을 함께 LLM 입력으로 제공합니다.
생성: LLM은 보강된 입력을 바탕으로 질문에 대한 응답을 생성합니다.
2. 청킹 전략: 의미적, 재귀적, 에이전트 기반
2.1 의미적 청킹 (Semantic Chunking)
import nltk
from nltk.tokenize import sent_tokenize
from sentence_transformers import SentenceTransformer
class SemanticChunker:
def __init__(self, model_name='all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
def chunk_document(self, text, chunk_size=512):
# 문장 단위로 분할
sentences = sent_tokenize(text)
# 문장 임베딩 계산
sentence_embeddings = self.model.encode(sentences)
# 문장 간 유사도를 기반으로 청킹
chunks = []
current_chunk = []
current_length = 0
for i, (sentence, embedding) in enumerate(zip(sentences, sentence_embeddings)):
if current_length + len(sentence) > chunk_size:
chunks.append(' '.join(current_chunk))
current_chunk = [sentence]
current_length = len(sentence)
else:
current_chunk.append(sentence)
current_length += len(sentence)
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
2.2 재귀적 청킹 (Recursive Chunking)
import re
class RecursiveChunker:
def __init__(self, max_chunk_size=1024, overlap_ratio=0.1):
self.max_chunk_size = max_chunk_size
self.overlap_ratio = overlap_ratio
def chunk_recursive(self, text):
chunks = []
# 문서를 문단 단위로 분할
paragraphs = text.split('\n\n')
for paragraph in paragraphs:
if len(paragraph) <= self.max_chunk_size:
chunks.append(paragraph)
else:
# 문단을 재귀적으로 청킹
chunks.extend(self._recursive_split(paragraph))
return chunks
def _recursive_split(self, text):
if len(text) <= self.max_chunk_size:
return [text]
# 중간 지점을 찾아서 분할
mid = len(text) // 2
split_point = text.rfind(' ', mid - 50, mid + 50)
if split_point == -1:
split_point = mid
left_chunk = text[:split_point]
right_chunk = text[split_point:].lstrip()
# 오버랩 처리
overlap_size = int(self.max_chunk_size * self.overlap_ratio)
overlap = text[max(0, split_point - overlap_size):split_point]
return [left_chunk] + self._recursive_split(overlap + right_chunk)
2.3 에이전트 기반 청킹 (Agent-based Chunking)
class AgentChunker:
def __init__(self, chunker_type='semantic'):
self.chunker_type = chunker_type
if chunker_type == 'semantic':
self.chunker = SemanticChunker()
elif chunker_type == 'recursive':
self.chunker = RecursiveChunker()
def process_document(self, document, chunk_size=1024):
# 문서 분석 및 적절한 청킹 전략 적용
if self.chunker_type == 'semantic':
chunks = self.chunker.chunk_document(document, chunk_size)
else:
chunks = self.chunker.chunk_recursive(document)
# 청킹된 문서에 메타데이터 추가
processed_chunks = []
for i, chunk in enumerate(chunks):
processed_chunks.append({
'id': f'chunk_{i}',
'text': chunk,
'source': document.get('source', 'unknown'),
'chunk_index': i
})
return processed_chunks
3. 임베딩 모델 선택과 비교
3.1 임베딩 모델 비교
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
class EmbeddingEvaluator:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
'sentence-t5-base': SentenceTransformer('sentence-t5-base')
}
def evaluate_models(self, test_sentences):
results = {}
for name, model in self.models.items():
embeddings = model.encode(test_sentences)
# 간단한 유사도 테스트
similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
results[name] = similarity
return results
# 사용 예시
evaluator = EmbeddingEvaluator()
test_sentences = [
"The quick brown fox jumps over the lazy dog",
"A fast brown fox leaps over a sleepy dog"
]
similarities = evaluator.evaluate_models(test_sentences)
4. 벡터 데이터베이스 비교: Chroma, Qdrant, pgvector, Milvus
4.1 Chroma 벡터 데이터베이스
import chromadb
from chromadb import Client
from chromadb.config import Settings
class ChromaVectorDB:
def __init__(self, collection_name="rag_collection"):
self.client = Client(Settings(chroma_db_impl="duckdb"))
self.collection = self.client.get_or_create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
def add_documents(self, documents, ids):
self.collection.add(
documents=documents,
ids=ids
)
def search(self, query, limit=5):
results = self.collection.query(
query_texts=[query],
n_results=limit
)
return results
4.2 Qdrant 벡터 데이터베이스
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
self.collection_name = "rag_collection"
def create_collection(self):
self.client.recreate_collection(
collection_name=self.collection_name,
vectors_config={"size": 384, "distance": "Cosine"}
)
def add_documents(self, documents, ids, metadata=None):
points = [
{
"id": id,
"vector": doc["embedding"],
"payload": {
"text": doc["text"],
"source": doc.get("source", ""),
**(metadata[i] if metadata else {})
}
}
for i, (id, doc) in enumerate(zip(ids, documents))
]
self.client.upsert(
collection_name=self.collection_name,
points=points
)
def search(self, query_vector, limit=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
limit=limit
)
return results
5. 전체 RAG 파이프라인 코드
python
import numpy as np
from sentence_transformers import SentenceTransformer
from chromadb import Client
from chromadb.config import Settings
class RAGPipeline:
def __init__(self):
# 임베딩 모델 초기화
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
self.client = Client(Settings(chroma_db_impl="duckdb"))
self.collection = self.client.get_or_create_collection(
name="rag_collection",
metadata={"hnsw:space": "cosine"}
)
def add_document(self, text, doc_id, metadata=None):
"""문서 추가"""
# 청킹
chunker = RecursiveChunker()
chunks = chunker.chunk_recursive(text)
# 임베딩 생성
embeddings = self.embedding_model.encode(chunks)
# 벡터 DB에 저장
self.collection.add(
documents=chunks,
embeddings=embeddings.tolist(),
ids=[f"{doc_id}_{i}" for i in range(len(chunks))],
metadatas=[{
"source": doc_id,
"chunk_index": i,
**(metadata or {})
} for i in range(len(chunks))]
)
def retrieve(self, query, top_k=3):
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)