matias yoon

Posted on May 25

RAG 시스템 실전 구축 (v18)

#ai #llm #developers #tutorial

RAG 시스템 실전 구축 (v18)

1. RAG의 핵심 개념

RAG(Retrieval-Augmented Generation)는 대규모 언어 모델(LLM)의 성능을 향상시키기 위한 아키텍처입니다. 다음과 같은 세 가지 단계로 작동합니다:

Retrieval: 사용자 질문과 관련된 문서 조각을 검색합니다.
Augmentation: 검색된 문서를 질문에 맞게 증강합니다.
Generation: 증강된 컨텍스트를 기반으로 답변을 생성합니다.

이 루프는 다음과 같은 순서로 작동합니다:

# RAG 루프의 기본 구조
def rag_loop(query, vector_db, embedding_model, llm):
    # 1. 검색
    retrieved_docs = vector_db.search(query, top_k=5)

    # 2. 증강
    augmented_context = augment_context(query, retrieved_docs)

    # 3. 생성
    answer = llm.generate(augmented_context, query)

    return answer

2. Chunking 전략

2.1 Semantic Chunking

의미 기반으로 문서를 분할하여 의미 단위를 유지합니다:

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.cluster import KMeans

class SemanticChunker:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def chunk_by_semantic_distance(self, text, threshold=0.7):
        sentences = self.split_sentences(text)
        embeddings = self.model.encode(sentences)

        # 의미 거리 기반 클러스터링
        kmeans = KMeans(n_clusters=len(sentences)//2)
        clusters = kmeans.fit_predict(embeddings)

        chunks = []
        for i in range(len(sentences)):
            if i == 0 or clusters[i] != clusters[i-1]:
                chunks.append(sentences[i])
            else:
                chunks[-1] += " " + sentences[i]

        return chunks

# 사용 예시
chunker = SemanticChunker()
chunks = chunker.chunk_by_semantic_distance("문서 내용...", threshold=0.7)

2.2 Recursive Chunking

재귀적으로 작은 단위로 분할:

import re

class RecursiveChunker:
    def __init__(self, chunk_size=512, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def chunk_recursive(self, text):
        # 문단 기반 분할
        paragraphs = text.split('\n\n')
        chunks = []

        for para in paragraphs:
            if len(para) <= self.chunk_size:
                chunks.append(para)
            else:
                # 문장 기반 분할
                sentences = re.split(r'[.!?]+', para)
                current_chunk = ""

                for sentence in sentences:
                    if len(current_chunk) + len(sentence) < self.chunk_size:
                        current_chunk += sentence + ". "
                    else:
                        if current_chunk:
                            chunks.append(current_chunk.strip())
                        current_chunk = sentence + ". "

                if current_chunk:
                    chunks.append(current_chunk.strip())

        return chunks

2.3 Agentic Chunking

자동으로 의미 있는 단위를 추출하는 방식:

class AgenticChunker:
    def __init__(self, chunk_size=512):
        self.chunk_size = chunk_size

    def chunk_with_entity_detection(self, text):
        # 단어/구를 기반으로 문맥을 분석
        # 여기서는 간단한 예시
        words = text.split()
        chunks = []

        current_chunk = []
        current_length = 0

        for word in words:
            if current_length + len(word) > self.chunk_size:
                chunks.append(' '.join(current_chunk))
                current_chunk = [word]
                current_length = len(word)
            else:
                current_chunk.append(word)
                current_length += len(word) + 1

        if current_chunk:
            chunks.append(' '.join(current_chunk))

        return chunks

3. 임베딩 모델 선택

3.1 모델 비교

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

class EmbeddingBenchmark:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-3b': SentenceTransformer('sentence-t5-3b')
        }

    def benchmark_model(self, texts, model_name):
        model = self.models[model_name]
        embeddings = model.encode(texts)
        return embeddings

    def compare_models(self, test_texts):
        results = {}
        for name, model in self.models.items():
            # 성능 측정
            start_time = time.time()
            embeddings = model.encode(test_texts)
            end_time = time.time()

            results[name] = {
                'time': end_time - start_time,
                'size': len(embeddings),
                'dimensions': len(embeddings[0])
            }
        return results

# 성능 비교
benchmark = EmbeddingBenchmark()
test_data = ["테스트 문장 1", "테스트 문장 2", "테스트 문장 3"]
results = benchmark.compare_models(test_data)
print(results)

3.2 최적 모델 선택 가이드

all-MiniLM-L6-v2: 속도와 정확성의 균형
all-mpnet-base-v2: 더 높은 정확성
sentence-t5-3b: 대규모 문서 처리

4. 벡터 데이터베이스 비교

4.1 Chroma 비교

import chromadb
from chromadb.config import Settings

class ChromaDBManager:
    def __init__(self, persist_directory="./chroma_db"):
        self.client = chromadb.PersistentClient(path=persist_directory)
        self.collection = self.client.get_or_create_collection("docs")

    def add_documents(self, documents, embeddings, ids):
        self.collection.add(
            documents=documents,
            embeddings=embeddings,
            ids=ids
        )

    def search(self, query, top_k=5):
        results = self.collection.query(
            query_texts=[query],
            n_results=top_k
        )
        return results['documents'][0], results['distances'][0]

4.2 Qdrant 비교

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue

class QdrantManager:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = "rag_docs"

    def create_collection(self, vector_size=384):
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(
                size=vector_size,
                distance="Cosine"
            )
        )

    def search(self, query_vector, top_k=5):
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=top_k
        )
        return results

4.3 pgvector 비교

import psycopg2
from psycopg2.extras import Json
import numpy as np

class PGVectorManager:
    def __init__(self, connection_string):
        self.conn = psycopg2.connect(connection_string)
        self.cursor = self.conn.cursor()

    def create_table(self):
        self.cursor.execute("""
            CREATE TABLE IF NOT EXISTS documents (
                id UUID PRIMARY KEY,
                content TEXT,
                embedding VECTOR(384)
            )
        """)
        self.conn.commit()

    def search(self, query_vector, top_k=5):
        self.cursor.execute("""
            SELECT content, embedding
            FROM documents
            ORDER BY embedding <-> %s
            LIMIT %s
        """, (query_vector, top_k))
        return self.cursor.fetchall()

5. 전체 RAG 파이프라인 구현


python
import numpy as np
from sentence_transformers import SentenceTransformer
from chromadb import PersistentClient
import time

class CompleteRAGPipeline:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.embedding_model = SentenceTransformer(model_name)
        self.vector_db = PersistentClient(path="./rag_db")
        self.collection = self.vector_db.get_or_create_collection("docs")

    def prepare_documents(self, documents):
        """문서 준비 및 청킹"""
        chunker = RecursiveChunker(chunk_size=512)
        chunks = []
        ids = []

        for i, doc in enumerate(documents):
            doc_chunks = chunker.chunk_recursive(doc)
            for j, chunk in enumerate(doc_chunks):
                chunks.append(chunk)
                ids.append(f"{i}-{j}")

        return chunks, ids

    def embed_and_store(self, documents):
        """문서 임베딩 및

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)

DEV Community

RAG 시스템 실전 구축 (v18)

RAG 시스템 실전 구축 (v18)

1. RAG의 핵심 개념

2. Chunking 전략

2.1 Semantic Chunking

2.2 Recursive Chunking

2.3 Agentic Chunking

3. 임베딩 모델 선택

3.1 모델 비교

3.2 최적 모델 선택 가이드

4. 벡터 데이터베이스 비교

4.1 Chroma 비교

4.2 Qdrant 비교

4.3 pgvector 비교

5. 전체 RAG 파이프라인 구현

Top comments (0)