DEV Community

matias yoon
matias yoon

Posted on

RAG 시스템 실전 구축 (v2)

RAG 시스템 실전 구축 (v2)

1. RAG 시스템 개요

RAG (Retrieval-Augmented Generation) 시스템은 정보 검색과 생성을 결합하여 정확한 답변을 제공하는 아키텍처입니다. 다음 세 단계로 구성됩니다:

  1. Retrieval: 사용자 질문과 유사한 문서 검색
  2. Augmentation: 검색된 문서를 프롬프트에 추가
  3. Generation: LLM이 검색된 정보를 기반으로 답변 생성
# RAG 프로세스 흐름
class RAGPipeline:
    def __init__(self, retriever, generator):
        self.retriever = retriever
        self.generator = generator

    def process_query(self, query):
        # 1. 검색
        retrieved_docs = self.retriever.retrieve(query)
        # 2. 증강
        augmented_prompt = self._augment_prompt(query, retrieved_docs)
        # 3. 생성
        response = self.generator.generate(augmented_prompt)
        return response

    def _augment_prompt(self, query, docs):
        # 문서들을 프롬프트에 결합
        context = "\n\n".join([doc.content for doc in docs])
        return f"질문: {query}\n\n참조 문서:\n{context}"
Enter fullscreen mode Exit fullscreen mode

2. Chunking 전략

문서를 적절한 크기로 분할하는 것이 중요합니다. 다음 세 가지 전략을 비교합니다:

2.1 Semantic Chunking

의미 기반으로 문단을 나누어 의미 단위를 유지합니다.

from sentence_transformers import SentenceTransformer
import numpy as np

class SemanticChunker:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def chunk_documents(self, text, threshold=0.75):
        sentences = text.split('. ')
        embeddings = self.model.encode(sentences)
        chunks = []
        current_chunk = []
        current_embedding = None

        for i, (sentence, emb) in enumerate(zip(sentences, embeddings)):
            if current_embedding is None:
                current_chunk.append(sentence)
                current_embedding = emb
            else:
                similarity = np.dot(current_embedding, emb) / (
                    np.linalg.norm(current_embedding) * np.linalg.norm(emb)
                )
                if similarity > threshold:
                    current_chunk.append(sentence)
                else:
                    chunks.append('. '.join(current_chunk))
                    current_chunk = [sentence]
                    current_embedding = emb

        if current_chunk:
            chunks.append('. '.join(current_chunk))
        return chunks
Enter fullscreen mode Exit fullscreen mode

2.2 Recursive Chunking

문서를 여러 수준으로 재귀적으로 분할합니다.

class RecursiveChunker:
    def __init__(self, chunk_size=512, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def chunk_recursive(self, text, max_depth=3):
        chunks = []
        self._recursive_split(text, chunks, 0, max_depth)
        return chunks

    def _recursive_split(self, text, chunks, depth, max_depth):
        if len(text) <= self.chunk_size or depth >= max_depth:
            chunks.append(text)
            return

        # 중간 지점을 찾아 분할
        mid = len(text) // 2
        split_point = text.rfind(' ', mid - self.overlap, mid + self.overlap)
        if split_point == -1:
            split_point = mid

        chunks.append(text[:split_point])
        self._recursive_split(text[split_point:], chunks, depth + 1, max_depth)
Enter fullscreen mode Exit fullscreen mode

3. 임베딩 모델 선택

모델 성능과 속도를 고려하여 적절한 모델을 선택합니다:

from sentence_transformers import SentenceTransformer
import torch
import time

class EmbeddingBenchmark:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': 'small, fast',
            'all-mpnet-base-v2': 'medium, balanced',
            'sentence-t5-xxl': 'large, high quality'
        }

    def benchmark_models(self, texts):
        results = {}
        for model_name, description in self.models.items():
            model = SentenceTransformer(model_name)

            # 시간 측정
            start_time = time.time()
            embeddings = model.encode(texts)
            end_time = time.time()

            results[model_name] = {
                'time': end_time - start_time,
                'size': len(embeddings[0]),
                'description': description
            }
        return results

# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["AI 기술 발전", "머신러닝 알고리즘", "데이터 분석"]
results = benchmark.benchmark_models(texts)
print(results)
Enter fullscreen mode Exit fullscreen mode

4. Vector Database 비교

다음 네 가지 벡터 데이터베이스를 비교합니다:

4.1 Chroma

가장 간단한 로컬 테스트용

import chromadb
from chromadb.utils import embedding_functions

class ChromaVectorDB:
    def __init__(self, collection_name="rag_collection"):
        self.client = chromadb.Client()
        self.collection = self.client.get_or_create_collection(
            name=collection_name,
            embedding_function=embedding_functions.DefaultEmbeddingFunction()
        )

    def add_documents(self, documents, ids):
        self.collection.add(
            documents=documents,
            ids=ids
        )

    def search(self, query, top_k=5):
        results = self.collection.query(
            query_texts=[query],
            n_results=top_k
        )
        return results['documents'][0]
Enter fullscreen mode Exit fullscreen mode

4.2 Qdrant

고성능, 클라우드 호환

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)

    def create_collection(self, collection_name, vector_size=384):
        self.client.recreate_collection(
            collection_name=collection_name,
            vectors_config={
                "vector": {
                    "size": vector_size,
                    "distance": "Cosine"
                }
            }
        )

    def search(self, collection_name, query_vector, top_k=5):
        results = self.client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=top_k
        )
        return [hit.payload for hit in results]
Enter fullscreen mode Exit fullscreen mode

4.3 pgvector

PostgreSQL 확장, 데이터베이스 통합이 용이

import psycopg2
import numpy as np

class PGVectorDB:
    def __init__(self, connection_string):
        self.conn = psycopg2.connect(connection_string)
        self.create_table_if_not_exists()

    def create_table_if_not_exists(self):
        with self.conn.cursor() as cur:
            cur.execute("""
                CREATE TABLE IF NOT EXISTS documents (
                    id UUID PRIMARY KEY,
                    content TEXT,
                    embedding VECTOR(384)
                )
            """)
            self.conn.commit()

    def search(self, query_embedding, top_k=5):
        with self.conn.cursor() as cur:
            cur.execute("""
                SELECT content, 1 - (embedding <-> %s) as similarity
                FROM documents
                ORDER BY similarity DESC
                LIMIT %s
            """, (query_embedding, top_k))
            return cur.fetchall()
Enter fullscreen mode Exit fullscreen mode

5. 전체 RAG 파이프라인 구현


python
import os
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
import numpy as np
from typing import List, Tuple

class CompleteRAG:
    def __init__(self, embedding_model="all-MiniLM-L6-v2"):
        # 임베딩 모델 로드
        self.embedder = SentenceTransformer(embedding_model)

        # 벡터 DB 초기화
        self.vector_db = QdrantClient(host="localhost", port=6333)
        self.collection_name = "rag_docs"

        # 초기화
        self.initialize_vector_db()

    def initialize_vector_db(self):
        """벡터 데이터베이스 초기화"""
        self.vector_db.recreate_collection(
            collection_name=self.collection_name,
            vectors_config={"vector": {"size": 384, "distance": "Cosine"}}
        )

    def add_documents(self, documents: List[Tuple[str, str]]):
        """문서 추가"""
        ids = [doc[0] for doc in documents]
        contents = [doc[1] for doc in documents]

        # 임베딩 생성
        embeddings = self.embedder.encode(contents)

        # 벡터 DB에 저장
        self.vector_db.upsert(
            collection_name=self.collection_name,
            points=[
                {
                    "id": doc_id,
                    "vector": embedding.tolist(),
                    "payload": {"content": content}
                }
                for doc_id, content, embedding in zip(ids, contents, embeddings)
            ]
        )

    def retrieve(self, query: str, top_k: int = 5):
        """검색

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)