RAG 시스템 실전 구축 (v19)
1. RAG 시스템 기본 개념
RAG (Retrieval-Augmented Generation)는 검색 기반 생성 모델로, LLM이 외부 지식을 활용해 더 정확하고 최신 정보를 제공할 수 있게 합니다. 핵심 루프는 다음과 같습니다:
사용자 질문 → 검색 (Retrieval) → 증강 (Augmentation) → 생성 (Generation)
RAG 루프 구조
# 단순 RAG 루프 예시
class SimpleRAG:
def __init__(self, embedder, vector_db, generator):
self.embedder = embedder
self.vector_db = vector_db
self.generator = generator
def query(self, question):
# 1. 질문 임베딩
query_embedding = self.embedder.encode(question)
# 2. 검색
relevant_docs = self.vector_db.search(query_embedding, k=5)
# 3. 증강 (context 구성)
context = " ".join([doc.content for doc in relevant_docs])
# 4. 생성
response = self.generator.generate(question, context)
return response
2. 청킹 전략 (Chunking Strategies)
2.1 의미적 청킹 (Semantic Chunking)
from sentence_transformers import SentenceTransformer
import numpy as np
class SemanticChunker:
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def chunk_by_semantic(self, text, threshold=0.75):
sentences = self.split_into_sentences(text)
embeddings = self.model.encode(sentences)
chunks = []
current_chunk = [sentences[0]]
current_embedding = embeddings[0]
for i in range(1, len(sentences)):
similarity = self.cosine_similarity(current_embedding, embeddings[i])
if similarity > threshold:
current_chunk.append(sentences[i])
else:
chunks.append(" ".join(current_chunk))
current_chunk = [sentences[i]]
current_embedding = embeddings[i]
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def cosine_similarity(self, a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
2.2 재귀적 청킹 (Recursive Chunking)
class RecursiveChunker:
def __init__(self, max_chunk_size=512):
self.max_chunk_size = max_chunk_size
def recursive_chunk(self, text, separators=["\n\n", "\n", " ", ""]):
chunks = []
def split_recursive(text, level=0):
if level >= len(separators) or len(text) <= self.max_chunk_size:
chunks.append(text)
return
separator = separators[level]
parts = text.split(separator)
current_chunk = ""
for part in parts:
if len(current_chunk) + len(part) + len(separator) <= self.max_chunk_size:
current_chunk += part + separator
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = part + separator
if current_chunk and len(current_chunk) > 0:
chunks.append(current_chunk.strip())
split_recursive(text)
return chunks
2.3 에이전트 기반 청킹 (Agentic Chunking)
class AgenticChunker:
def __init__(self, prompt_template):
self.prompt_template = prompt_template
def chunk_with_agent(self, text, max_tokens=1000):
# 텍스트를 여러 부분으로 분할하고 각 부분을 요약하여 청킹
sections = self.divide_into_sections(text)
chunks = []
for section in sections:
if len(section) > max_tokens:
sub_sections = self.split_section(section, max_tokens)
chunks.extend(sub_sections)
else:
chunks.append(section)
return chunks
def divide_into_sections(self, text):
# 제목 기준 분할
import re
sections = re.split(r'(#{1,6}\s+.*?)(?=\n#{1,6}|\Z)', text, flags=re.DOTALL)
return [s for s in sections if s.strip()]
3. 임베딩 모델 선택 및 비교
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
class EmbeddingBenchmark:
def __init__(self):
self.models = {
"all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
"all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
"multi-qa-MiniLM-L6-v2": SentenceTransformer("multi-qa-MiniLM-L6-v2")
}
def compare_models(self, texts, queries):
results = {}
for model_name, model in self.models.items():
# 텍스트 임베딩
text_embeddings = model.encode(texts)
query_embeddings = model.encode(queries)
# 유사도 계산
similarities = cosine_similarity(query_embeddings, text_embeddings)
results[model_name] = {
"mean_similarity": np.mean(similarities),
"std_similarity": np.std(similarities),
"top_k_similarities": np.max(similarities, axis=1)
}
return results
# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["Python은 인기 있는 프로그래밍 언어입니다.", "JavaScript는 웹 개발에 사용됩니다."]
queries = ["Python에 대해 설명해 주세요.", "웹 개발 언어는 무엇인가요?"]
results = benchmark.compare_models(texts, queries)
print(results)
4. 벡터 데이터베이스 비교
4.1 Chroma (로컬)
import chromadb
from chromadb.utils import embedding_functions
class ChromaVectorDB:
def __init__(self, collection_name="rag_collection"):
self.client = chromadb.Client()
self.collection = self.client.get_or_create_collection(
name=collection_name,
embedding_function=embedding_functions.DefaultEmbeddingFunction()
)
def add_documents(self, documents, ids):
self.collection.add(
documents=documents,
ids=ids
)
def search(self, query, k=5):
results = self.collection.query(
query_texts=[query],
n_results=k
)
return results['documents'][0]
4.2 Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
self.collection_name = "rag_collection"
if not self.client.collection_exists(collection_name=self.collection_name):
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(size=384, distance="Cosine")
)
def add_documents(self, documents, ids):
self.client.upsert(
collection_name=self.collection_name,
points=[
{
"id": idx,
"vector": doc.embedding,
"payload": {"content": doc.content}
} for idx, doc in enumerate(documents)
]
)
def search(self, query_vector, k=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
limit=k
)
return [hit.payload['content'] for hit in results]
4.3 pgvector (PostgreSQL 확장)
import psycopg2
import numpy as np
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
self.setup_table()
def setup_table(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS rag_documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(384)
)
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_embedding ON rag_documents
USING ivfflat (embedding vector_cosine_ops)
""")
self.conn.commit()
def add_documents(self, documents):
with self.conn.cursor() as cur:
for doc in documents:
cur.execute(
"INSERT INTO rag_documents (content, embedding) VALUES (%s, %s)",
(doc.content, doc.embedding.tolist())
)
self.conn.commit()
def search(self, query_vector, k=5):
with self.conn.cursor() as cur:
cur.execute("""
SELECT content FROM rag_documents
ORDER BY embedding <=> %s
LIMIT %s
""", (query_vector.tolist(), k))
return [row[0] for row in cur.fetchall()]
5. 완전한 RAG 파이프라인 구현
python
import os
from typing import List, Dict, Any
from
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Top comments (0)