matias yoon

Posted on May 24

RAG 시스템 실전 구축 (v2)

#ai #llm #developers #tutorial

RAG 시스템 실전 구축 (v2)

1. RAG 시스템 개요

RAG (Retrieval-Augmented Generation) 시스템은 정보 검색과 생성을 결합하여 정확한 답변을 제공하는 아키텍처입니다. 다음 세 단계로 구성됩니다:

Retrieval: 사용자 질문과 유사한 문서 검색
Augmentation: 검색된 문서를 프롬프트에 추가
Generation: LLM이 검색된 정보를 기반으로 답변 생성

# RAG 프로세스 흐름
class RAGPipeline:
    def __init__(self, retriever, generator):
        self.retriever = retriever
        self.generator = generator

    def process_query(self, query):
        # 1. 검색
        retrieved_docs = self.retriever.retrieve(query)
        # 2. 증강
        augmented_prompt = self._augment_prompt(query, retrieved_docs)
        # 3. 생성
        response = self.generator.generate(augmented_prompt)
        return response

    def _augment_prompt(self, query, docs):
        # 문서들을 프롬프트에 결합
        context = "\n\n".join([doc.content for doc in docs])
        return f"질문: {query}\n\n참조 문서:\n{context}"

2. Chunking 전략

문서를 적절한 크기로 분할하는 것이 중요합니다. 다음 세 가지 전략을 비교합니다:

2.1 Semantic Chunking

의미 기반으로 문단을 나누어 의미 단위를 유지합니다.

from sentence_transformers import SentenceTransformer
import numpy as np

class SemanticChunker:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def chunk_documents(self, text, threshold=0.75):
        sentences = text.split('. ')
        embeddings = self.model.encode(sentences)
        chunks = []
        current_chunk = []
        current_embedding = None

        for i, (sentence, emb) in enumerate(zip(sentences, embeddings)):
            if current_embedding is None:
                current_chunk.append(sentence)
                current_embedding = emb
            else:
                similarity = np.dot(current_embedding, emb) / (
                    np.linalg.norm(current_embedding) * np.linalg.norm(emb)
                )
                if similarity > threshold:
                    current_chunk.append(sentence)
                else:
                    chunks.append('. '.join(current_chunk))
                    current_chunk = [sentence]
                    current_embedding = emb

        if current_chunk:
            chunks.append('. '.join(current_chunk))
        return chunks

2.2 Recursive Chunking

문서를 여러 수준으로 재귀적으로 분할합니다.

class RecursiveChunker:
    def __init__(self, chunk_size=512, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def chunk_recursive(self, text, max_depth=3):
        chunks = []
        self._recursive_split(text, chunks, 0, max_depth)
        return chunks

    def _recursive_split(self, text, chunks, depth, max_depth):
        if len(text) <= self.chunk_size or depth >= max_depth:
            chunks.append(text)
            return

        # 중간 지점을 찾아 분할
        mid = len(text) // 2
        split_point = text.rfind(' ', mid - self.overlap, mid + self.overlap)
        if split_point == -1:
            split_point = mid

        chunks.append(text[:split_point])
        self._recursive_split(text[split_point:], chunks, depth + 1, max_depth)

3. 임베딩 모델 선택

모델 성능과 속도를 고려하여 적절한 모델을 선택합니다:

from sentence_transformers import SentenceTransformer
import torch
import time

class EmbeddingBenchmark:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': 'small, fast',
            'all-mpnet-base-v2': 'medium, balanced',
            'sentence-t5-xxl': 'large, high quality'
        }

    def benchmark_models(self, texts):
        results = {}
        for model_name, description in self.models.items():
            model = SentenceTransformer(model_name)

            # 시간 측정
            start_time = time.time()
            embeddings = model.encode(texts)
            end_time = time.time()

            results[model_name] = {
                'time': end_time - start_time,
                'size': len(embeddings[0]),
                'description': description
            }
        return results

# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["AI 기술 발전", "머신러닝 알고리즘", "데이터 분석"]
results = benchmark.benchmark_models(texts)
print(results)

4. Vector Database 비교

다음 네 가지 벡터 데이터베이스를 비교합니다:

4.1 Chroma

가장 간단한 로컬 테스트용

import chromadb
from chromadb.utils import embedding_functions

class ChromaVectorDB:
    def __init__(self, collection_name="rag_collection"):
        self.client = chromadb.Client()
        self.collection = self.client.get_or_create_collection(
            name=collection_name,
            embedding_function=embedding_functions.DefaultEmbeddingFunction()
        )

    def add_documents(self, documents, ids):
        self.collection.add(
            documents=documents,
            ids=ids
        )

    def search(self, query, top_k=5):
        results = self.collection.query(
            query_texts=[query],
            n_results=top_k
        )
        return results['documents'][0]

4.2 Qdrant

고성능, 클라우드 호환

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)

    def create_collection(self, collection_name, vector_size=384):
        self.client.recreate_collection(
            collection_name=collection_name,
            vectors_config={
                "vector": {
                    "size": vector_size,
                    "distance": "Cosine"
                }
            }
        )

    def search(self, collection_name, query_vector, top_k=5):
        results = self.client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=top_k
        )
        return [hit.payload for hit in results]

4.3 pgvector

PostgreSQL 확장, 데이터베이스 통합이 용이

import psycopg2
import numpy as np

class PGVectorDB:
    def __init__(self, connection_string):
        self.conn = psycopg2.connect(connection_string)
        self.create_table_if_not_exists()

    def create_table_if_not_exists(self):
        with self.conn.cursor() as cur:
            cur.execute("""
                CREATE TABLE IF NOT EXISTS documents (
                    id UUID PRIMARY KEY,
                    content TEXT,
                    embedding VECTOR(384)
                )
            """)
            self.conn.commit()

    def search(self, query_embedding, top_k=5):
        with self.conn.cursor() as cur:
            cur.execute("""
                SELECT content, 1 - (embedding <-> %s) as similarity
                FROM documents
                ORDER BY similarity DESC
                LIMIT %s
            """, (query_embedding, top_k))
            return cur.fetchall()

5. 전체 RAG 파이프라인 구현


python
import os
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
import numpy as np
from typing import List, Tuple

class CompleteRAG:
    def __init__(self, embedding_model="all-MiniLM-L6-v2"):
        # 임베딩 모델 로드
        self.embedder = SentenceTransformer(embedding_model)

        # 벡터 DB 초기화
        self.vector_db = QdrantClient(host="localhost", port=6333)
        self.collection_name = "rag_docs"

        # 초기화
        self.initialize_vector_db()

    def initialize_vector_db(self):
        """벡터 데이터베이스 초기화"""
        self.vector_db.recreate_collection(
            collection_name=self.collection_name,
            vectors_config={"vector": {"size": 384, "distance": "Cosine"}}
        )

    def add_documents(self, documents: List[Tuple[str, str]]):
        """문서 추가"""
        ids = [doc[0] for doc in documents]
        contents = [doc[1] for doc in documents]

        # 임베딩 생성
        embeddings = self.embedder.encode(contents)

        # 벡터 DB에 저장
        self.vector_db.upsert(
            collection_name=self.collection_name,
            points=[
                {
                    "id": doc_id,
                    "vector": embedding.tolist(),
                    "payload": {"content": content}
                }
                for doc_id, content, embedding in zip(ids, contents, embeddings)
            ]
        )

    def retrieve(self, query: str, top_k: int = 5):
        """검색

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)

DEV Community

RAG 시스템 실전 구축 (v2)

RAG 시스템 실전 구축 (v2)

1. RAG 시스템 개요

2. Chunking 전략

2.1 Semantic Chunking

2.2 Recursive Chunking

3. 임베딩 모델 선택

4. Vector Database 비교

4.1 Chroma

4.2 Qdrant

4.3 pgvector

5. 전체 RAG 파이프라인 구현

Top comments (0)