DEV Community

matias yoon
matias yoon

Posted on

RAG 시스템 실전 구축 (v25)

RAG 시스템 실전 구축 (v25)

1. RAG 기본 개념: 검색 → 보강 → 생성 루프

RAG (Retrieval-Augmented Generation)는 검색 기반 생성 시스템으로, LLM이 외부 정보를 검색하여 생성을 보강하는 아키텍처입니다. 이 시스템은 다음과 같은 루프를 따릅니다:

사용자 질문 → 검색 → 보강 → 생성
Enter fullscreen mode Exit fullscreen mode

검색: 질문과 유사한 문서 조각을 벡터 데이터베이스에서 찾습니다.

보강: 검색된 문서와 질문을 함께 LLM 입력으로 제공합니다.

생성: LLM은 보강된 입력을 바탕으로 질문에 대한 응답을 생성합니다.

2. 청킹 전략: 의미적, 재귀적, 에이전트 기반

2.1 의미적 청킹 (Semantic Chunking)

import nltk
from nltk.tokenize import sent_tokenize
from sentence_transformers import SentenceTransformer

class SemanticChunker:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def chunk_document(self, text, chunk_size=512):
        # 문장 단위로 분할
        sentences = sent_tokenize(text)

        # 문장 임베딩 계산
        sentence_embeddings = self.model.encode(sentences)

        # 문장 간 유사도를 기반으로 청킹
        chunks = []
        current_chunk = []
        current_length = 0

        for i, (sentence, embedding) in enumerate(zip(sentences, sentence_embeddings)):
            if current_length + len(sentence) > chunk_size:
                chunks.append(' '.join(current_chunk))
                current_chunk = [sentence]
                current_length = len(sentence)
            else:
                current_chunk.append(sentence)
                current_length += len(sentence)

        if current_chunk:
            chunks.append(' '.join(current_chunk))

        return chunks
Enter fullscreen mode Exit fullscreen mode

2.2 재귀적 청킹 (Recursive Chunking)

import re

class RecursiveChunker:
    def __init__(self, max_chunk_size=1024, overlap_ratio=0.1):
        self.max_chunk_size = max_chunk_size
        self.overlap_ratio = overlap_ratio

    def chunk_recursive(self, text):
        chunks = []

        # 문서를 문단 단위로 분할
        paragraphs = text.split('\n\n')

        for paragraph in paragraphs:
            if len(paragraph) <= self.max_chunk_size:
                chunks.append(paragraph)
            else:
                # 문단을 재귀적으로 청킹
                chunks.extend(self._recursive_split(paragraph))

        return chunks

    def _recursive_split(self, text):
        if len(text) <= self.max_chunk_size:
            return [text]

        # 중간 지점을 찾아서 분할
        mid = len(text) // 2
        split_point = text.rfind(' ', mid - 50, mid + 50)

        if split_point == -1:
            split_point = mid

        left_chunk = text[:split_point]
        right_chunk = text[split_point:].lstrip()

        # 오버랩 처리
        overlap_size = int(self.max_chunk_size * self.overlap_ratio)
        overlap = text[max(0, split_point - overlap_size):split_point]

        return [left_chunk] + self._recursive_split(overlap + right_chunk)
Enter fullscreen mode Exit fullscreen mode

2.3 에이전트 기반 청킹 (Agent-based Chunking)

class AgentChunker:
    def __init__(self, chunker_type='semantic'):
        self.chunker_type = chunker_type
        if chunker_type == 'semantic':
            self.chunker = SemanticChunker()
        elif chunker_type == 'recursive':
            self.chunker = RecursiveChunker()

    def process_document(self, document, chunk_size=1024):
        # 문서 분석 및 적절한 청킹 전략 적용
        if self.chunker_type == 'semantic':
            chunks = self.chunker.chunk_document(document, chunk_size)
        else:
            chunks = self.chunker.chunk_recursive(document)

        # 청킹된 문서에 메타데이터 추가
        processed_chunks = []
        for i, chunk in enumerate(chunks):
            processed_chunks.append({
                'id': f'chunk_{i}',
                'text': chunk,
                'source': document.get('source', 'unknown'),
                'chunk_index': i
            })

        return processed_chunks
Enter fullscreen mode Exit fullscreen mode

3. 임베딩 모델 선택과 비교

3.1 임베딩 모델 비교

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class EmbeddingEvaluator:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-base': SentenceTransformer('sentence-t5-base')
        }

    def evaluate_models(self, test_sentences):
        results = {}

        for name, model in self.models.items():
            embeddings = model.encode(test_sentences)
            # 간단한 유사도 테스트
            similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
            results[name] = similarity

        return results

# 사용 예시
evaluator = EmbeddingEvaluator()
test_sentences = [
    "The quick brown fox jumps over the lazy dog",
    "A fast brown fox leaps over a sleepy dog"
]
similarities = evaluator.evaluate_models(test_sentences)
Enter fullscreen mode Exit fullscreen mode

4. 벡터 데이터베이스 비교: Chroma, Qdrant, pgvector, Milvus

4.1 Chroma 벡터 데이터베이스

import chromadb
from chromadb import Client
from chromadb.config import Settings

class ChromaVectorDB:
    def __init__(self, collection_name="rag_collection"):
        self.client = Client(Settings(chroma_db_impl="duckdb"))
        self.collection = self.client.get_or_create_collection(
            name=collection_name,
            metadata={"hnsw:space": "cosine"}
        )

    def add_documents(self, documents, ids):
        self.collection.add(
            documents=documents,
            ids=ids
        )

    def search(self, query, limit=5):
        results = self.collection.query(
            query_texts=[query],
            n_results=limit
        )
        return results
Enter fullscreen mode Exit fullscreen mode

4.2 Qdrant 벡터 데이터베이스

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = "rag_collection"

    def create_collection(self):
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config={"size": 384, "distance": "Cosine"}
        )

    def add_documents(self, documents, ids, metadata=None):
        points = [
            {
                "id": id,
                "vector": doc["embedding"],
                "payload": {
                    "text": doc["text"],
                    "source": doc.get("source", ""),
                    **(metadata[i] if metadata else {})
                }
            }
            for i, (id, doc) in enumerate(zip(ids, documents))
        ]

        self.client.upsert(
            collection_name=self.collection_name,
            points=points
        )

    def search(self, query_vector, limit=5):
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=limit
        )
        return results
Enter fullscreen mode Exit fullscreen mode

5. 전체 RAG 파이프라인 코드


python
import numpy as np
from sentence_transformers import SentenceTransformer
from chromadb import Client
from chromadb.config import Settings

class RAGPipeline:
    def __init__(self):
        # 임베딩 모델 초기화
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.client = Client(Settings(chroma_db_impl="duckdb"))
        self.collection = self.client.get_or_create_collection(
            name="rag_collection",
            metadata={"hnsw:space": "cosine"}
        )

    def add_document(self, text, doc_id, metadata=None):
        """문서 추가"""
        # 청킹
        chunker = RecursiveChunker()
        chunks = chunker.chunk_recursive(text)

        # 임베딩 생성
        embeddings = self.embedding_model.encode(chunks)

        # 벡터 DB에 저장
        self.collection.add(
            documents=chunks,
            embeddings=embeddings.tolist(),
            ids=[f"{doc_id}_{i}" for i in range(len(chunks))],
            metadatas=[{
                "source": doc_id,
                "chunk_index": i,
                **(metadata or {})
            } for i in range(len(chunks))]
        )

    def retrieve(self, query, top_k=3):


---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)