DEV Community

Cover image for Build a Self-Evolving Memory Agent in 150 Lines
Narnaiezzsshaa Truong
Narnaiezzsshaa Truong

Posted on

Build a Self-Evolving Memory Agent in 150 Lines

This is the runnable companion to the Memory Architecture series. No dependencies. Copy, paste, run.

The skeleton demonstrates:

  • Inner loop: runtime behavior (encode → store → retrieve → manage)
  • Outer loop: architecture evolution (adapt config based on performance)
  • Four rooms: encode, store, retrieve, manage as separate concerns
python self_evolving_agent.py
Enter fullscreen mode Exit fullscreen mode

The Full Skeleton

"""
self_evolving_agent.py

A minimal, runnable skeleton of a self-evolving memory agent.
No external dependencies. Uses fake embeddings so you can see
the loop behavior end-to-end before swapping in real components.
"""

import json
import math
import random
from typing import List, Dict, Any, Tuple


# -----------------------------
# Utility: fake embedding + similarity
# -----------------------------

def fake_embed(text: str) -> List[float]:
    """Naive embedding: character frequency vector. Replace with real model."""
    counts = [0.0] * 26
    for ch in text.lower():
        if "a" <= ch <= "z":
            counts[ord(ch) - ord("a")] += 1.0
    norm = math.sqrt(sum(c * c for c in counts)) or 1.0
    return [c / norm for c in counts]


def cosine_sim(a: List[float], b: List[float]) -> float:
    return sum(x * y for x, y in zip(a, b))


# -----------------------------
# Memory architecture (The Four Rooms)
# -----------------------------

class MemoryItem:
    def __init__(self, text: str, vector: List[float], label: str = ""):
        self.text = text
        self.vector = vector
        self.label = label


class Memory:
    def __init__(self):
        # Config knobs — these are what the outer loop evolves
        self.top_k = 3
        self.sim_threshold = 0.2
        self.decay_prob = 0.0
        self.items: List[MemoryItem] = []

        # Stats for drift detection
        self.total_retrievals = 0
        self.successful_retrievals = 0

    # ---- ROOM 1: ENCODE ----
    def encode(self, text: str) -> List[float]:
        return fake_embed(text)

    # ---- ROOM 2: STORE ----
    def store(self, text: str, label: str = "") -> None:
        vec = self.encode(text)
        self.items.append(MemoryItem(text, vec, label))

    # ---- ROOM 3: RETRIEVE ----
    def retrieve(self, query: str) -> List[MemoryItem]:
        if not self.items:
            return []

        q_vec = self.encode(query)
        scored: List[Tuple[float, MemoryItem]] = []
        for item in self.items:
            sim = cosine_sim(q_vec, item.vector)
            if sim >= self.sim_threshold:
                scored.append((sim, item))

        scored.sort(key=lambda x: x[0], reverse=True)
        results = [it for _, it in scored[:self.top_k]]

        self.total_retrievals += 1
        if results:
            self.successful_retrievals += 1

        return results

    # ---- ROOM 4: MANAGE ----
    def manage(self) -> None:
        if self.decay_prob <= 0.0:
            return
        self.items = [item for item in self.items if random.random() > self.decay_prob]

    # ---- DIAGNOSTICS ----
    def retrieval_success_rate(self) -> float:
        if self.total_retrievals == 0:
            return 1.0
        return self.successful_retrievals / self.total_retrievals

    def size(self) -> int:
        return len(self.items)

    def to_config(self) -> Dict[str, Any]:
        return {
            "top_k": self.top_k,
            "sim_threshold": round(self.sim_threshold, 3),
            "decay_prob": round(self.decay_prob, 3),
            "size": self.size(),
            "retrieval_success_rate": round(self.retrieval_success_rate(), 3),
        }


# -----------------------------
# Model stub
# -----------------------------

class DummyModel:
    """Stub LLM: echoes query + context. Replace with real model."""

    def run(self, query: str, context: List[MemoryItem]) -> str:
        ctx_texts = [f"  [{i.label}] {i.text}" for i in context]
        return f"Q: {query}\nContext:\n" + "\n".join(ctx_texts) if ctx_texts else f"Q: {query}\nContext: (none)"


# -----------------------------
# Agent: Inner Loop + Outer Loop
# -----------------------------

class Agent:
    def __init__(self, memory: Memory, model: DummyModel):
        self.memory = memory
        self.model = model
        self.history: List[Dict[str, Any]] = []

    # ---- INNER LOOP (runtime) ----
    def handle_task(self, query: str, label: str) -> str:
        self.memory.store(query, label=label)
        context = self.memory.retrieve(query)
        output = self.model.run(query, context)
        self.memory.manage()

        success = any(item.label == label for item in context)
        self.history.append({"query": query, "label": label, "success": success})
        return output

    # ---- OUTER LOOP (architecture evolution) ----
    def evolve_memory_architecture(self) -> None:
        success_rate = self.memory.retrieval_success_rate()
        size = self.memory.size()

        print("\n>>> OUTER LOOP: Evaluating memory architecture")
        print(f"    Before: {self.memory.to_config()}")

        # Adapt retrieval aggressiveness
        if success_rate < 0.6:
            self.memory.top_k = min(self.memory.top_k + 1, 10)
            self.memory.sim_threshold = max(self.memory.sim_threshold - 0.02, 0.05)
        elif success_rate > 0.9:
            self.memory.top_k = max(self.memory.top_k - 1, 1)
            self.memory.sim_threshold = min(self.memory.sim_threshold + 0.02, 0.8)

        # Adapt decay based on size
        if size > 100:
            self.memory.decay_prob = min(self.memory.decay_prob + 0.05, 0.5)
        elif size < 30:
            self.memory.decay_prob = max(self.memory.decay_prob - 0.05, 0.0)

        print(f"    After:  {self.memory.to_config()}")

        # Reset stats for next evaluation window
        self.memory.total_retrievals = 0
        self.memory.successful_retrievals = 0

    def dump_history(self, path: str = "agent_history.jsonl") -> None:
        with open(path, "w") as f:
            for record in self.history:
                f.write(json.dumps(record) + "\n")


# -----------------------------
# Demo
# -----------------------------

def main():
    memory = Memory()
    model = DummyModel()
    agent = Agent(memory, model)

    # Toy dataset: queries with category labels
    tasks = [
        ("How do I process a refund?", "refund"),
        ("Steps to issue a refund via card", "refund"),
        ("How to troubleshoot a login error?", "login"),
        ("User cannot sign in, what now?", "login"),
        ("How to update user email address?", "account"),
        ("Change account email for a customer", "account"),
    ] * 3

    random.shuffle(tasks)

    for i, (query, label) in enumerate(tasks, start=1):
        print(f"\n--- Task {i} ---")
        output = agent.handle_task(query, label)
        print(output)

        # Run outer loop every 5 tasks
        if i % 5 == 0:
            agent.evolve_memory_architecture()

    agent.dump_history()
    print("\n✓ Done. History written to agent_history.jsonl")


if __name__ == "__main__":
    main()
Enter fullscreen mode Exit fullscreen mode

What You'll See

When you run it:

  1. Tasks 1–5: Inner loop runs, memory fills, retrieval improves
  2. Outer loop fires: Config adjusts based on retrieval success rate
  3. Tasks 6–10: Behavior changes because architecture changed
  4. Repeat: The agent evolves its own memory strategy

The to_config() output shows you exactly what changed and why.


Extending It

Component Swap In
fake_embed() OpenAI, Cohere, or local embedding model
self.items Pinecone, Weaviate, Chroma, pgvector
DummyModel Any LLM via API or local
evolve_memory_architecture() Your own adaptation logic

The architecture stays the same. The components scale.


The Series

  1. Why Memory Architecture Matters More Than Your Model—concepts
  2. How To Detect Memory Drift In Production Agents—metrics + alerting
  3. Build a Self-Evolving Memory Agent in 150 Lines—you are here

For the conceptual framework: The Two Loops on Substack.

Top comments (0)