DEV Community

matias yoon
matias yoon

Posted on

RAG 시스템 실전 구축 (v18)

RAG 시스템 실전 구축 (v18)

1. RAG의 핵심 개념

RAG(Retrieval-Augmented Generation)는 대규모 언어 모델(LLM)의 성능을 향상시키기 위한 아키텍처입니다. 다음과 같은 세 가지 단계로 작동합니다:

  1. Retrieval: 사용자 질문과 관련된 문서 조각을 검색합니다.
  2. Augmentation: 검색된 문서를 질문에 맞게 증강합니다.
  3. Generation: 증강된 컨텍스트를 기반으로 답변을 생성합니다.

이 루프는 다음과 같은 순서로 작동합니다:

# RAG 루프의 기본 구조
def rag_loop(query, vector_db, embedding_model, llm):
    # 1. 검색
    retrieved_docs = vector_db.search(query, top_k=5)

    # 2. 증강
    augmented_context = augment_context(query, retrieved_docs)

    # 3. 생성
    answer = llm.generate(augmented_context, query)

    return answer
Enter fullscreen mode Exit fullscreen mode

2. Chunking 전략

2.1 Semantic Chunking

의미 기반으로 문서를 분할하여 의미 단위를 유지합니다:

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.cluster import KMeans

class SemanticChunker:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def chunk_by_semantic_distance(self, text, threshold=0.7):
        sentences = self.split_sentences(text)
        embeddings = self.model.encode(sentences)

        # 의미 거리 기반 클러스터링
        kmeans = KMeans(n_clusters=len(sentences)//2)
        clusters = kmeans.fit_predict(embeddings)

        chunks = []
        for i in range(len(sentences)):
            if i == 0 or clusters[i] != clusters[i-1]:
                chunks.append(sentences[i])
            else:
                chunks[-1] += " " + sentences[i]

        return chunks

# 사용 예시
chunker = SemanticChunker()
chunks = chunker.chunk_by_semantic_distance("문서 내용...", threshold=0.7)
Enter fullscreen mode Exit fullscreen mode

2.2 Recursive Chunking

재귀적으로 작은 단위로 분할:

import re

class RecursiveChunker:
    def __init__(self, chunk_size=512, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def chunk_recursive(self, text):
        # 문단 기반 분할
        paragraphs = text.split('\n\n')
        chunks = []

        for para in paragraphs:
            if len(para) <= self.chunk_size:
                chunks.append(para)
            else:
                # 문장 기반 분할
                sentences = re.split(r'[.!?]+', para)
                current_chunk = ""

                for sentence in sentences:
                    if len(current_chunk) + len(sentence) < self.chunk_size:
                        current_chunk += sentence + ". "
                    else:
                        if current_chunk:
                            chunks.append(current_chunk.strip())
                        current_chunk = sentence + ". "

                if current_chunk:
                    chunks.append(current_chunk.strip())

        return chunks
Enter fullscreen mode Exit fullscreen mode

2.3 Agentic Chunking

자동으로 의미 있는 단위를 추출하는 방식:

class AgenticChunker:
    def __init__(self, chunk_size=512):
        self.chunk_size = chunk_size

    def chunk_with_entity_detection(self, text):
        # 단어/구를 기반으로 문맥을 분석
        # 여기서는 간단한 예시
        words = text.split()
        chunks = []

        current_chunk = []
        current_length = 0

        for word in words:
            if current_length + len(word) > self.chunk_size:
                chunks.append(' '.join(current_chunk))
                current_chunk = [word]
                current_length = len(word)
            else:
                current_chunk.append(word)
                current_length += len(word) + 1

        if current_chunk:
            chunks.append(' '.join(current_chunk))

        return chunks
Enter fullscreen mode Exit fullscreen mode

3. 임베딩 모델 선택

3.1 모델 비교

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

class EmbeddingBenchmark:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-3b': SentenceTransformer('sentence-t5-3b')
        }

    def benchmark_model(self, texts, model_name):
        model = self.models[model_name]
        embeddings = model.encode(texts)
        return embeddings

    def compare_models(self, test_texts):
        results = {}
        for name, model in self.models.items():
            # 성능 측정
            start_time = time.time()
            embeddings = model.encode(test_texts)
            end_time = time.time()

            results[name] = {
                'time': end_time - start_time,
                'size': len(embeddings),
                'dimensions': len(embeddings[0])
            }
        return results

# 성능 비교
benchmark = EmbeddingBenchmark()
test_data = ["테스트 문장 1", "테스트 문장 2", "테스트 문장 3"]
results = benchmark.compare_models(test_data)
print(results)
Enter fullscreen mode Exit fullscreen mode

3.2 최적 모델 선택 가이드

  • all-MiniLM-L6-v2: 속도와 정확성의 균형
  • all-mpnet-base-v2: 더 높은 정확성
  • sentence-t5-3b: 대규모 문서 처리

4. 벡터 데이터베이스 비교

4.1 Chroma 비교

import chromadb
from chromadb.config import Settings

class ChromaDBManager:
    def __init__(self, persist_directory="./chroma_db"):
        self.client = chromadb.PersistentClient(path=persist_directory)
        self.collection = self.client.get_or_create_collection("docs")

    def add_documents(self, documents, embeddings, ids):
        self.collection.add(
            documents=documents,
            embeddings=embeddings,
            ids=ids
        )

    def search(self, query, top_k=5):
        results = self.collection.query(
            query_texts=[query],
            n_results=top_k
        )
        return results['documents'][0], results['distances'][0]
Enter fullscreen mode Exit fullscreen mode

4.2 Qdrant 비교

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue

class QdrantManager:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = "rag_docs"

    def create_collection(self, vector_size=384):
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(
                size=vector_size,
                distance="Cosine"
            )
        )

    def search(self, query_vector, top_k=5):
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=top_k
        )
        return results
Enter fullscreen mode Exit fullscreen mode

4.3 pgvector 비교

import psycopg2
from psycopg2.extras import Json
import numpy as np

class PGVectorManager:
    def __init__(self, connection_string):
        self.conn = psycopg2.connect(connection_string)
        self.cursor = self.conn.cursor()

    def create_table(self):
        self.cursor.execute("""
            CREATE TABLE IF NOT EXISTS documents (
                id UUID PRIMARY KEY,
                content TEXT,
                embedding VECTOR(384)
            )
        """)
        self.conn.commit()

    def search(self, query_vector, top_k=5):
        self.cursor.execute("""
            SELECT content, embedding
            FROM documents
            ORDER BY embedding <-> %s
            LIMIT %s
        """, (query_vector, top_k))
        return self.cursor.fetchall()
Enter fullscreen mode Exit fullscreen mode

5. 전체 RAG 파이프라인 구현


python
import numpy as np
from sentence_transformers import SentenceTransformer
from chromadb import PersistentClient
import time

class CompleteRAGPipeline:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.embedding_model = SentenceTransformer(model_name)
        self.vector_db = PersistentClient(path="./rag_db")
        self.collection = self.vector_db.get_or_create_collection("docs")

    def prepare_documents(self, documents):
        """문서 준비 및 청킹"""
        chunker = RecursiveChunker(chunk_size=512)
        chunks = []
        ids = []

        for i, doc in enumerate(documents):
            doc_chunks = chunker.chunk_recursive(doc)
            for j, chunk in enumerate(doc_chunks):
                chunks.append(chunk)
                ids.append(f"{i}-{j}")

        return chunks, ids

    def embed_and_store(self, documents):
        """문서 임베딩 및

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)