RAG 시스템 실전 구축 (v10)
1. RAG 기초 개념: 검색 → 보완 → 생성 루프
RAG(Retrieval-Augmented Generation)는 외부 지식 베이스를 활용한 LLM 생성을 위한 아키텍처입니다. 기본적인 루프는 다음과 같습니다:
- 검색(Retrieval): 사용자 질문을 벡터로 변환하고, 벡터 데이터베이스에서 관련 문서 검색
- 보완(Augmentation): 검색된 문서들을 프롬프트에 포함시켜 LLM 입력 생성
- 생성(Generation): LLM이 보완된 프롬프트 기반으로 응답 생성
# 간단한 RAG 루프 예시
class SimpleRAG:
def __init__(self, embedding_model, vector_db, llm):
self.embedding = embedding_model
self.vector_db = vector_db
self.llm = llm
def query(self, question):
# 1. 질문 임베딩
query_embedding = self.embedding.encode(question)
# 2. 검색
relevant_docs = self.vector_db.search(query_embedding, k=5)
# 3. 보완 및 생성
context = "\n".join([doc['content'] for doc in relevant_docs])
prompt = f"Context: {context}\n\nQuestion: {question}"
return self.llm.generate(prompt)
2. 청킹 전략: 의미적, 재귀적, 에이전트 기반
청킹 전략은 문서를 의미 단위로 나누는 방법으로, 각각 장단점이 있습니다:
# 의미적 청킹 (Semantic Chunking)
import numpy as np
from sentence_transformers import SentenceTransformer
class SemanticChunker:
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def chunk(self, text, chunk_size=512):
# 문장 단위로 분할
sentences = text.split('. ')
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
if current_length + len(sentence) > chunk_size:
chunks.append('. '.join(current_chunk))
current_chunk = [sentence]
current_length = len(sentence)
else:
current_chunk.append(sentence)
current_length += len(sentence)
if current_chunk:
chunks.append('. '.join(current_chunk))
return chunks
# 재귀적 청킹 (Recursive Chunking)
class RecursiveChunker:
def __init__(self, chunk_size=512, overlap=50):
self.chunk_size = chunk_size
self.overlap = overlap
def chunk(self, text):
chunks = []
start = 0
while start < len(text):
end = min(start + self.chunk_size, len(text))
# 오버랩 처리
if start > 0:
overlap_start = max(0, start - self.overlap)
chunk_text = text[overlap_start:end]
else:
chunk_text = text[start:end]
chunks.append(chunk_text)
start = end - self.overlap
if start >= len(text):
break
return chunks
# 에이전트 청킹 (Agentic Chunking)
class AgenticChunker:
def __init__(self, chunk_size=512):
self.chunk_size = chunk_size
def chunk(self, text):
# 문서 구조 분석 후 청킹
sections = self._parse_sections(text)
chunks = []
for section in sections:
if len(section) > self.chunk_size:
# 큰 섹션은 재귀적으로 청킹
sub_chunks = self._recursive_split(section, self.chunk_size)
chunks.extend(sub_chunks)
else:
chunks.append(section)
return chunks
def _parse_sections(self, text):
# 제목 기반 섹션 파싱
import re
pattern = r'(#{1,6}\s+.*?)(?=\n#{1,6}|\Z)'
sections = re.findall(pattern, text, re.DOTALL)
return sections
def _recursive_split(self, text, chunk_size):
# 재귀적 분할 로직
if len(text) <= chunk_size:
return [text]
mid = len(text) // 2
left = text[:mid]
right = text[mid:]
return self._recursive_split(left, chunk_size) + self._recursive_split(right, chunk_size)
3. 임베딩 모델 선택과 비교
임베딩 모델은 의미 표현의 품질에 직접 영향을 미칩니다:
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
# 다양한 임베딩 모델 비교
class EmbeddingBenchmark:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
'sentence-t5': SentenceTransformer('sentence-t5-base'),
'bert-base-nli': SentenceTransformer('bert-base-nli-mean-tokens')
}
def compare_models(self, texts, reference_embedding):
results = {}
for name, model in self.models.items():
# 벡터 생성
embeddings = model.encode(texts)
# 유사도 계산
similarities = np.dot(embeddings, reference_embedding.T)
avg_similarity = np.mean(similarities)
results[name] = {
'avg_similarity': avg_similarity,
'model': model
}
return results
# 임베딩 성능 비교 예시
benchmark = EmbeddingBenchmark()
test_texts = [
"The quick brown fox jumps over the lazy dog",
"A fast brown fox leaps over a sleepy dog",
"Machine learning is a subset of artificial intelligence"
]
# 실제 사용 예시
model = SentenceTransformer('all-mpnet-base-v2') # 추천 모델
embeddings = model.encode(test_texts)
4. 벡터 데이터베이스 비교
다양한 벡터 데이터베이스의 장단점 분석:
python
# Chroma 벡터 DB
import chromadb
from chromadb import Client
class ChromaVectorDB:
def __init__(self, collection_name="rag_collection"):
self.client = Client()
self.collection = self.client.get_or_create_collection(collection_name)
def add_documents(self, documents, ids):
self.collection.add(
documents=documents,
ids=ids
)
def search(self, query_vector, k=5):
results = self.collection.query(
query_embeddings=[query_vector],
n_results=k
)
return [{'id': id, 'content': content}
for id, content in zip(results['ids'][0], results['documents'][0])]
# Qdrant 벡터 DB
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
self.collection_name = "rag_collection"
def create_collection(self):
self.client.create_collection(
collection_name=self.collection_name,
vectors_config={"size": 768, "distance": "Cosine"}
)
def add_documents(self, documents, ids):
points = [
{
"id": id,
"vector": doc["embedding"],
"payload": {"content": doc["content"]}
}
for id, doc in zip(ids, documents)
]
self.client.upsert(
collection_name=self.collection_name,
points=points
)
def search(self, query_vector, k=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
limit=k
)
return [{'id': point.id, 'content': point.payload['content']}
for point in results]
# pgvector (PostgreSQL 확장)
import psycopg2
from psycopg2.extras import Json
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
self.create_table()
def create_table(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS rag_documents (
id TEXT PRIMARY KEY,
content TEXT,
embedding VECTOR(768)
)
""")
cur.execute("CREATE INDEX IF NOT EXISTS idx_embedding ON rag_documents USING ivfflat (embedding vector_cosine_ops)")
self.conn.commit()
def add_documents(self, documents, ids):
with self.conn.cursor() as cur:
for id, content in zip(ids, documents):
embedding = self.get_embedding(content)
cur.execute(
"INSERT INTO rag_documents (id, content, embedding) VALUES (%s, %s, %s)",
(id, content, embedding)
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)