RAG 시스템 실전 구축 (v42)

#ai #llm #developers #tutorial

RAG 시스템 실전 구축 (v42)

실제로 구축할 수 있는 RAG 시스템 구현 가이드

1. RAG 시스템 기본 구조

RAG(Retrieve-Augment-Generate) 시스템은 다음 세 가지 단계로 구성됩니다:

검색(Retrieval): 사용자 질문과 유사한 문서를 벡터 데이터베이스에서 찾음
보완(Augmentation): 검색된 문서를 프롬프트에 추가하여 컨텍스트 제공
생성(Generation): LLM이 답변 생성

# 기본 RAG 흐름
class BasicRAG:
    def __init__(self, embedding_model, vector_db):
        self.embedding_model = embedding_model
        self.vector_db = vector_db

    def retrieve(self, query):
        query_vector = self.embedding_model.encode(query)
        return self.vector_db.search(query_vector, k=5)

    def generate(self, query, retrieved_docs):
        prompt = f"질문: {query}\n참고 문서: {retrieved_docs}"
        return self.llm.generate(prompt)

2. 청킹 전략 (Chunking Strategies)

2.1 의미 기반 청킹 (Semantic Chunking)

의미 단위로 문서를 분할하여 의미적 일관성 유지:

from sentence_transformers import SentenceTransformer
import numpy as np

class SemanticChunker:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def chunk_semantically(self, text, min_chunk_size=100, max_chunk_size=500):
        sentences = text.split('. ')
        chunks = []
        current_chunk = ""

        for sentence in sentences:
            if len(current_chunk) + len(sentence) > max_chunk_size:
                if len(current_chunk) >= min_chunk_size:
                    chunks.append(current_chunk.strip())
                current_chunk = sentence + ". "
            else:
                current_chunk += sentence + ". "

        if current_chunk:
            chunks.append(current_chunk.strip())
        return chunks

# 사용 예시
chunker = SemanticChunker()
text = "AI 기술은 빠르게 발전하고 있습니다. 특히 자연어 처리(NLP) 분야에서는 많은 혁신이 일어나고 있습니다."
chunks = chunker.chunk_semantically(text)
print(chunks)

2.2 재귀적 청킹 (Recursive Chunking)

문서를 재귀적으로 분할하며 문맥을 유지:

class RecursiveChunker:
    def __init__(self, chunk_size=500, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def recursive_chunk(self, text):
        chunks = []
        start = 0

        while start < len(text):
            end = min(start + self.chunk_size, len(text))
            chunk = text[start:end]
            chunks.append(chunk)
            start = end - self.overlap

        return chunks

# 예시
recursive_chunker = RecursiveChunker(chunk_size=200)
text = "대규모 언어 모델은 자연어 이해와 생성 능력을 갖추고 있습니다. 이러한 모델은 다양한 응용 프로그램에서 활용됩니다."
chunks = recursive_chunker.recursive_chunk(text)
print(chunks)

2.3 에이전트 기반 청킹 (Agentic Chunking)

문서의 주제와 키워드를 기반으로 청킹:

class AgenticChunker:
    def __init__(self):
        self.topic_keywords = {
            'AI': ['artificial intelligence', 'machine learning', 'deep learning'],
            'Data': ['database', 'data science', 'analytics']
        }

    def chunk_by_topic(self, text):
        chunks = []
        # 간단한 주제 기반 분할
        paragraphs = text.split('\n\n')
        for para in paragraphs:
            if any(keyword in para.lower() for keyword in self.topic_keywords['AI']):
                chunks.append(('AI', para))
            elif any(keyword in para.lower() for keyword in self.topic_keywords['Data']):
                chunks.append(('Data', para))
        return chunks

3. 임베딩 모델 선택과 비교

3.1 다양한 임베딩 모델 비교

import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import numpy as np

class EmbeddingBenchmark:
    def __init__(self):
        # 다양한 임베딩 모델 로드
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-3b': SentenceTransformer('sentence-t5-3b'),
            'bert-base-nli-mean-tokens': SentenceTransformer('bert-base-nli-mean-tokens')
        }

    def benchmark_models(self, texts):
        results = {}
        for name, model in self.models.items():
            embeddings = model.encode(texts)
            results[name] = {
                'shape': embeddings.shape,
                'memory_usage': embeddings.nbytes / 1024 / 1024,  # MB
                'avg_time': self._time_encoding(model, texts)
            }
        return results

    def _time_encoding(self, model, texts):
        import time
        start = time.time()
        model.encode(texts)
        return time.time() - start

# 성능 비교
benchmark = EmbeddingBenchmark()
texts = ["이 문장은 테스트 문장입니다.", "다른 문장입니다."]
results = benchmark.benchmark_models(texts)
for model_name, stats in results.items():
    print(f"{model_name}: {stats['memory_usage']:.2f}MB, {stats['avg_time']:.4f}s")

3.2 최적의 모델 선택

class OptimalEmbeddingSelector:
    def __init__(self):
        self.benchmark_results = {}

    def evaluate_model(self, model_name, texts, eval_dataset):
        """모델 성능 평가"""
        model = SentenceTransformer(model_name)
        embeddings = model.encode(texts)

        # 예: 검색 정확도 평가
        accuracy = self._evaluate_retrieval_quality(embeddings, eval_dataset)
        return accuracy

    def _evaluate_retrieval_quality(self, embeddings, eval_dataset):
        # 간단한 정확도 계산 예시
        return 0.85  # 실제 구현에서는 실제 레이블과 비교

# 사용 예시
selector = OptimalEmbeddingSelector()
best_model = 'all-MiniLM-L6-v2'  # 실제 실험 결과에 따라 결정

4. 벡터 데이터베이스 비교

4.1 Chroma 비교

import chromadb
from chromadb.config import Settings

class ChromaVectorDB:
    def __init__(self, collection_name='rag_collection'):
        self.client = chromadb.Client(Settings(allow_reset=True))
        self.collection = self.client.get_or_create_collection(collection_name)

    def add_documents(self, documents, ids):
        embeddings = self._get_embeddings(documents)
        self.collection.add(
            embeddings=embeddings,
            documents=documents,
            ids=ids
        )

    def search(self, query, k=5):
        query_embedding = self._get_embeddings([query])[0]
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=k
        )
        return results['documents'][0]

    def _get_embeddings(self, texts):
        # 임베딩 모델을 여기에 연결
        from sentence_transformers import SentenceTransformer
        model = SentenceTransformer('all-MiniLM-L6-v2')
        return model.encode(texts)

4.2 Qdrant 비교


python
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host='localhost', port=6333, collection_name='rag_collection'):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = collection_name

        # 컬렉션 생성
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config={"size": 384, "distance": "Cosine"}
        )

    def add_documents(self, documents, ids):
        points = [
            {
                "id": i,
                "vector": self._get_embeddings([doc])[0],
                "payload": {"text": doc}
            }
            for i, doc in enumerate(documents)
        ]
        self.client.upsert(
            collection_name=self.collection_name,
            points=points
        )

    def search(self, query, k=5):
        query_vector = self._get_embeddings([query])[0]
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=k
        )
        return [hit.payload['text'] for hit in results]

    def _get_embeddings(self, texts):
        from sentence_transformers import SentenceTransformer

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)

Top comments (1)

Some comments may only be visible to logged-in visitors. Sign in to view all comments.