RAG 시스템 실전 구축 (v22)
개요
RAG (Retrieval-Augmented Generation) 시스템은 대규모 언어 모델(LLM)이 외부 지식을 활용하여 정확한 답변을 생성할 수 있도록 돕습니다. 이 가이드는 ML 엔지니어와 백엔드 개발자가 실전에서 RAG 시스템을 구축할 수 있도록 돕습니다.
1. RAG 기초 개념
RAG 시스템은 세 단계로 구성됩니다:
- 검색 (Retrieval): 질문에 관련된 문서/청크를 검색
- 증강 (Augmentation): 검색된 정보를 프롬프트에 통합
- 생성 (Generation): LLM이 증강된 프롬프트를 기반으로 답변 생성
기본 구조:
# RAG 루프 구현 예시
class BasicRAG:
def __init__(self, embedding_model, vector_db, llm):
self.embedding_model = embedding_model
self.vector_db = vector_db
self.llm = llm
def query(self, question):
# 1. 질문 임베딩
query_embedding = self.embedding_model.encode(question)
# 2. 검색
relevant_docs = self.vector_db.search(query_embedding, k=5)
# 3. 증강 프롬프트 생성
context = "\n".join([doc.content for doc in relevant_docs])
prompt = f"질문: {question}\n참고문서: {context}"
# 4. 생성
answer = self.llm.generate(prompt)
return answer
2. 청킹 전략
2.1 의미적 청킹 (Semantic Chunking)
from sentence_transformers import SentenceTransformer
import numpy as np
class SemanticChunker:
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def chunk_by_semantic(self, text, threshold=0.75):
# 문장 단위로 분할
sentences = text.split('. ')
embeddings = self.model.encode(sentences)
# 의미 차이가 큰 지점에서 청킹
chunks = []
current_chunk = []
current_embedding = None
for i, (sentence, embedding) in enumerate(zip(sentences, embeddings)):
if i == 0:
current_chunk.append(sentence)
current_embedding = embedding
else:
similarity = np.dot(current_embedding, embedding) / (
np.linalg.norm(current_embedding) * np.linalg.norm(embedding)
)
if similarity < threshold:
chunks.append(' '.join(current_chunk))
current_chunk = [sentence]
current_embedding = embedding
else:
current_chunk.append(sentence)
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
# 사용 예시
chunker = SemanticChunker()
text = "RAG 시스템은 검색된 정보를 기반으로 LLM이 정확한 답변을 생성합니다. 이 시스템은 외부 지식을 활용하여 질문에 대한 답변을 제공합니다."
chunks = chunker.chunk_by_semantic(text)
print(chunks)
2.2 재귀적 청킹 (Recursive Chunking)
class RecursiveChunker:
def __init__(self, chunk_size=512, overlap=64):
self.chunk_size = chunk_size
self.overlap = overlap
def chunk_recursive(self, text):
chunks = []
start = 0
while start < len(text):
end = min(start + self.chunk_size, len(text))
chunk = text[start:end]
# 중복 제거를 위한 오버랩 처리
if start > 0:
overlap_start = max(0, start - self.overlap)
overlap_chunk = text[overlap_start:start]
chunk = overlap_chunk + chunk
chunks.append(chunk)
start = end - self.overlap
return chunks
# 사용 예시
recursive_chunker = RecursiveChunker(chunk_size=256, overlap=32)
chunks = recursive_chunker.chunk_recursive(text)
3. 임베딩 모델 선택 및 비교
3.1 모델 비교 코드
import time
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
class EmbeddingBenchmark:
def __init__(self):
self.models = {
'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
'BAAI/bge-small-en': SentenceTransformer('BAAI/bge-small-en')
}
def benchmark_model(self, model_name, texts):
model = self.models[model_name]
# 추론 시간 측정
start_time = time.time()
embeddings = model.encode(texts)
end_time = time.time()
return {
'model': model_name,
'inference_time': end_time - start_time,
'embedding_size': len(embeddings[0]),
'memory_usage': len(embeddings) * len(embeddings[0]) * 4 # float32 크기
}
def compare_models(self, test_texts):
results = []
for model_name in self.models:
result = self.benchmark_model(model_name, test_texts)
results.append(result)
return results
# 모델 비교
benchmark = EmbeddingBenchmark()
test_texts = ["RAG 시스템의 기본 개념", "검색과 증강의 역할", "생성 모델의 작동 방식"]
results = benchmark.compare_models(test_texts)
for result in results:
print(f"{result['model']}: {result['inference_time']:.3f}초, "
f"{result['embedding_size']}차원, "
f"{result['memory_usage']/1024/1024:.2f}MB")
4. 벡터 데이터베이스 비교
4.1 Chroma vs Qdrant vs pgvector
# Chroma 사용 예시
import chromadb
from chromadb.utils import embedding_functions
class ChromaDB:
def __init__(self, collection_name="rag_collection"):
self.client = chromadb.Client()
self.collection = self.client.get_or_create_collection(
name=collection_name,
embedding_function=embedding_functions.DefaultEmbeddingFunction()
)
def add_documents(self, documents, ids):
self.collection.add(
documents=documents,
ids=ids
)
def search(self, query, top_k=5):
results = self.collection.query(
query_texts=[query],
n_results=top_k
)
return results['documents'][0]
# Qdrant 사용 예시
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition
class QdrantDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
def create_collection(self, collection_name, vector_size=384):
self.client.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=vector_size, distance="Cosine")
)
def add_documents(self, collection_name, documents, ids):
self.client.upsert(
collection_name=collection_name,
points=[
{
"id": id,
"vector": doc.embedding,
"payload": {"text": doc.content}
}
for id, doc in zip(ids, documents)
]
)
# pgvector 사용 예시
import psycopg2
from psycopg2.extras import Json
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
def create_table(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS rag_documents (
id UUID PRIMARY KEY,
content TEXT,
embedding VECTOR(384)
)
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_embedding ON rag_documents
USING ivfflat (embedding vector_cosine_ops)
""")
self.conn.commit()
def search(self, query_embedding, top_k=5):
with self.conn.cursor() as cur:
cur.execute("""
SELECT id, content, embedding
FROM rag_documents
ORDER BY embedding <-> %s
LIMIT %s
""", (query_embedding, top_k))
return cur.fetchall()
5. 전체 RAG 파이프라인 코드
python
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
import json
class RAGPipeline:
def __init__(self, embedding_model_name="all-MiniLM-L6-v2"):
self.embedding_model = SentenceTransformer(embedding_model_name)
self.documents = []
self.embeddings = []
self.id_to_doc = {}
def add_documents(self, documents: List[Dict[str, Any]]
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)