DEV Community

lijesom9-create
lijesom9-create

Posted on

Agent Memory System: Giving AI Long-Term Memory

Agent记忆系统:让AI拥有长期记忆

人类有短期记忆和长期记忆,AI也需要。本文基于education-agent的记忆系统实现,结合MemGPT和Generative Agents论文,探讨如何为AI Agent构建记忆系统。

前言

你和ChatGPT聊天时,有没有这种感觉:

"我上次不是告诉过你吗?"
"我之前说的那个偏好你忘了?"

AI的"失忆"问题,源于它的记忆机制:对话窗口就是它的全部记忆

窗口一关闭,记忆就消失。

记忆类型

人类记忆的三种类型

类型 持续时间 容量 例子
感觉记忆 毫秒级 看到的图像
短期记忆 秒-分钟 7±2项 刚记住的电话号码
长期记忆 永久 无限 童年经历

AI记忆的对应

人类记忆 AI对应 实现方式
感觉记忆 当前输入 用户当前消息
短期记忆 对话历史 messages数组
长期记忆 外部存储 数据库、文件

Education-Agent的记忆系统

架构设计

用户输入
    │
    ▼
┌─────────────────────────────────────┐
│           记忆管理器                │
│  ┌─────────────┐  ┌─────────────┐  │
│  │ 短期记忆    │  │ 长期记忆    │  │
│  │ (对话历史)  │  │ (用户画像)  │  │
│  └─────────────┘  └─────────────┘  │
│  ┌─────────────┐  ┌─────────────┐  │
│  │ 档案记忆    │  │ 工作记忆    │  │
│  │ (历史记录)  │  │ (当前任务)  │  │
│  └─────────────┘  └─────────────┘  │
└─────────────────────────────────────┘
    │
    ▼
检索相关记忆 → 注入上下文 → LLM生成
Enter fullscreen mode Exit fullscreen mode

记忆类型定义

# memory/types.py
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum

class MemoryType(Enum):
    SHORT_TERM = "short_term"      # 对话历史
    LONG_TERM = "long_term"        # 用户画像
    ARCHIVAL = "archival"          # 历史记录
    WORKING = "working"            # 当前任务上下文

@dataclass
class Memory:
    id: str
    content: str
    memory_type: MemoryType
    importance: float = 0.5        # 重要性分数 0-1
    created_at: datetime = field(default_factory=datetime.now)
    last_accessed: datetime = field(default_factory=datetime.now)
    access_count: int = 0
    metadata: dict = field(default_factory=dict)

@dataclass
class UserProfile:
    """用户画像"""
    preferences: dict = field(default_factory=dict)      # 偏好
    expertise_level: str = "intermediate"                 # 专业水平
    communication_style: str = "balanced"                 # 沟通风格
    interests: list = field(default_factory=list)         # 兴趣
    history_summary: str = ""                             # 历史摘要
Enter fullscreen mode Exit fullscreen mode

记忆管理器

# memory/manager.py
class MemoryManager:
    def __init__(self, db, llm):
        self.db = db
        self.llm = llm
        self.short_term = ShortTermMemory(max_tokens=4000)
        self.long_term = LongTermMemory(db)
        self.archival = ArchivalMemory(db)
        self.working = WorkingMemory()

    async def remember(self, content: str, memory_type: MemoryType, 
                       importance: float = 0.5, metadata: dict = None):
        """存储记忆"""
        memory = Memory(
            id=str(uuid.uuid4()),
            content=content,
            memory_type=memory_type,
            importance=importance,
            metadata=metadata or {}
        )

        if memory_type == MemoryType.SHORT_TERM:
            await self.short_term.add(memory)
        elif memory_type == MemoryType.LONG_TERM:
            await self.long_term.add(memory)
        elif memory_type == MemoryType.ARCHIVAL:
            await self.archival.add(memory)

    async def recall(self, query: str, memory_types: list[MemoryType] = None,
                     top_k: int = 5) -> list[Memory]:
        """检索记忆"""
        if memory_types is None:
            memory_types = [MemoryType.SHORT_TERM, MemoryType.LONG_TERM, MemoryType.ARCHIVAL]

        all_memories = []

        for mt in memory_types:
            if mt == MemoryType.SHORT_TERM:
                memories = await self.short_term.search(query, top_k)
            elif mt == MemoryType.LONG_TERM:
                memories = await self.long_term.search(query, top_k)
            elif mt == MemoryType.ARCHIVAL:
                memories = await self.archival.search(query, top_k)
            else:
                continue

            all_memories.extend(memories)

        # 按重要性和相关性排序
        all_memories.sort(key=lambda m: m.importance, reverse=True)
        return all_memories[:top_k]

    async def get_context(self, query: str) -> str:
        """获取上下文(注入到prompt)"""
        # 1. 获取相关记忆
        memories = await self.recall(query)

        # 2. 获取用户画像
        profile = await self.long_term.get_profile()

        # 3. 组装上下文
        context = f"""
## 用户画像
- 专业水平: {profile.expertise_level}
- 沟通风格: {profile.communication_style}
- 兴趣: {', '.join(profile.interests)}

## 相关记忆
"""
        for mem in memories:
            context += f"- {mem.content}\n"

        return context
Enter fullscreen mode Exit fullscreen mode

短期记忆(对话历史)

# memory/short_term.py
class ShortTermMemory:
    def __init__(self, max_tokens: int = 4000):
        self.messages = []
        self.max_tokens = max_tokens

    async def add(self, memory: Memory):
        """添加到对话历史"""
        self.messages.append(memory)

        # 超出限制时压缩
        if self._count_tokens() > self.max_tokens:
            await self._compress()

    async def search(self, query: str, top_k: int = 5) -> list[Memory]:
        """搜索对话历史"""
        # 简单实现:返回最近的消息
        return self.messages[-top_k:]

    async def _compress(self):
        """压缩对话历史"""
        # 保留最近10条,其余摘要
        recent = self.messages[-10:]
        old = self.messages[:-10]

        if old:
            summary = await self._summarize(old)
            self.messages = [
                Memory(content=f"之前的对话摘要: {summary}", 
                       memory_type=MemoryType.SHORT_TERM)
            ] + recent

    async def _summarize(self, messages: list[Memory]) -> str:
        """用LLM摘要"""
        text = "\n".join([m.content for m in messages])
        prompt = f"请简洁总结以下对话:\n{text}"
        return await self.llm.ainvoke(prompt)
Enter fullscreen mode Exit fullscreen mode

长期记忆(用户画像)

# memory/long_term.py
class LongTermMemory:
    def __init__(self, db):
        self.db = db
        self.profile = UserProfile()

    async def add(self, memory: Memory):
        """添加到长期记忆"""
        # 存储到数据库
        await self.db.memories.insert_one({
            "id": memory.id,
            "content": memory.content,
            "importance": memory.importance,
            "created_at": memory.created_at,
            "metadata": memory.metadata
        })

        # 更新用户画像
        await self._update_profile(memory)

    async def search(self, query: str, top_k: int = 5) -> list[Memory]:
        """搜索长期记忆"""
        # 向量检索
        results = await self.db.memories.aggregate([
            {
                "$vectorSearch": {
                    "query": query,
                    "path": "embedding",
                    "numCandidates": top_k * 10,
                    "limit": top_k
                }
            }
        ]).to_list()

        return [Memory(**r) for r in results]

    async def _update_profile(self, memory: Memory):
        """更新用户画像"""
        prompt = f"""根据以下信息更新用户画像:

当前画像: {self.profile}
新信息: {memory.content}

请更新用户的偏好、兴趣、专业水平等信息。"""

        updated = await self.llm.ainvoke(prompt)
        self.profile = parse_profile(updated)

    async def get_profile(self) -> UserProfile:
        """获取用户画像"""
        return self.profile
Enter fullscreen mode Exit fullscreen mode

档案记忆

# memory/archival.py
class ArchivalMemory:
    def __init__(self, db):
        self.db = db

    async def add(self, memory: Memory):
        """添加到档案"""
        # 向量化
        embedding = await self._embed(memory.content)

        await self.db.archival.insert_one({
            "id": memory.id,
            "content": memory.content,
            "embedding": embedding,
            "importance": memory.importance,
            "created_at": memory.created_at
        })

    async def search(self, query: str, top_k: int = 5) -> list[Memory]:
        """搜索档案"""
        query_embedding = await self._embed(query)

        results = await self.db.archival.aggregate([
            {
                "$vectorSearch": {
                    "query": query_embedding,
                    "path": "embedding",
                    "numCandidates": top_k * 10,
                    "limit": top_k
                }
            }
        ]).to_list()

        return [Memory(**r) for r in results]
Enter fullscreen mode Exit fullscreen mode

MemGPT的记忆架构

MemGPT提出了更复杂的记忆管理策略:

# memgpt/agent.py
class MemGPTAgent:
    def __init__(self):
        self.main_memory = []      # 主记忆(类似人类的工作记忆)
        self.archival_memory = []  # 档案记忆(类似人类的长期记忆)
        self.recall_memory = []    # 回忆记忆(对话历史)

    async def step(self, user_input: str) -> str:
        """执行一步"""
        # 1. 检查主记忆是否需要刷新
        if self._is_memory_full():
            await self._flush_to_archival()

        # 2. 检索相关记忆
        relevant_memories = await self._search_memories(user_input)

        # 3. 注入上下文
        context = self._build_context(user_input, relevant_memories)

        # 4. 生成响应
        response = await self.llm.ainvoke(context)

        # 5. 存储到回忆记忆
        self.recall_memory.append({
            "user": user_input,
            "assistant": response
        })

        return response

    async def _flush_to_archival(self):
        """将主记忆刷新到档案"""
        # 用LLM决定哪些记忆重要,需要保留
        prompt = f"""决定以下记忆中哪些值得长期保存:

{self.main_memory}

返回值得保存的记忆ID列表。"""

        important_ids = await self.llm.ainvoke(prompt)

        # 重要的移到档案,不重要的丢弃
        for memory in self.main_memory:
            if memory["id"] in important_ids:
                self.archival_memory.append(memory)

        self.main_memory = []
Enter fullscreen mode Exit fullscreen mode

Generative Agents的记忆架构

来自Stanford的"Generative Agents"论文提出了更精细的记忆系统:

# generative_agents/memory.py
class GenerativeAgentMemory:
    def __init__(self):
        self.memory_stream = []  # 记忆流

    async def add(self, observation: str, importance: float):
        """添加观察到记忆流"""
        memory = {
            "content": observation,
            "importance": importance,
            "created_at": datetime.now(),
            "last_accessed": datetime.now(),
            "access_count": 0,
            "embedding": await self._embed(observation)
        }
        self.memory_stream.append(memory)

    async def retrieve(self, query: str, top_k: int = 5) -> list:
        """检索记忆(考虑时间衰减和重要性)"""
        query_embedding = await self._embed(query)

        scored_memories = []
        for memory in self.memory_stream:
            # 计算相关性
            relevance = cosine_similarity(query_embedding, memory["embedding"])

            # 计算时间衰减
            time_decay = self._time_decay(memory["last_accessed"])

            # 计算重要性
            importance = memory["importance"]

            # 综合分数
            score = relevance * 0.5 + time_decay * 0.3 + importance * 0.2

            scored_memories.append((memory, score))

        # 排序返回
        scored_memories.sort(key=lambda x: x[1], reverse=True)
        return [m for m, _ in scored_memories[:top_k]]

    def _time_decay(self, last_accessed: datetime) -> float:
        """时间衰减函数"""
        hours_since = (datetime.now() - last_accessed).total_seconds() / 3600
        return 1 / (1 + hours_since)  # 反比例衰减
Enter fullscreen mode Exit fullscreen mode

实战:构建记忆增强对话系统

# memory_enhanced_chat.py
class MemoryEnhancedChat:
    def __init__(self):
        self.memory_manager = MemoryManager()
        self.llm = ChatOpenAI(model="gpt-4")

    async def chat(self, user_input: str) -> str:
        """带记忆的对话"""
        # 1. 检索相关记忆
        context = await self.memory_manager.get_context(user_input)

        # 2. 构建prompt
        prompt = f"""你是一个有记忆的AI助手。

{context}

用户输入:{user_input}

请基于你的记忆回答用户的问题。如果记忆中没有相关信息,请诚实说明。"""

        # 3. 生成响应
        response = await self.llm.ainvoke(prompt)

        # 4. 存储这次对话
        await self.memory_manager.remember(
            f"用户: {user_input}\n助手: {response}",
            MemoryType.SHORT_TERM
        )

        # 5. 提取重要信息存入长期记忆
        important_info = await self._extract_important(user_input, response)
        if important_info:
            await self.memory_manager.remember(
                important_info,
                MemoryType.LONG_TERM,
                importance=0.8
            )

        return response
Enter fullscreen mode Exit fullscreen mode

总结

记忆类型 作用 存储位置 检索方式
短期记忆 当前对话 内存 时间顺序
长期记忆 用户画像 数据库 向量检索
档案记忆 历史记录 数据库 向量检索
工作记忆 当前任务 内存 直接访问

参考资料


记忆是AI个性化的基础。没有记忆的AI,每次对话都是第一次见面。

tags: memory, memgpt, personalization, rag, python
series: rag-knowledge-system

Top comments (0)