Agentic Memory — Persistent Context Management
Long-running agents face the context window problem: you can't fit months of interactions in a single prompt. Effective agents maintain different tiers of memory: working memory (current context), episodic memory (past conversations, stored in vector DB), semantic memory (facts), and procedural memory (learned skills).
Hierarchical Agent Memory
import chromadb
from openai import OpenAI
import json
from datetime import datetime
client = OpenAI()
chroma = chromadb.Client()
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# FOUR TYPES OF AGENT MEMORY
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
memory_types = {
"Working Memory": "Current context window -- what's actively in the LLM prompt. Limited by context window size.",
"Episodic Memory": "Past conversations and experiences -- stored in vector DB, retrieved by similarity.",
"Semantic Memory": "Facts and knowledge -- user preferences, domain facts, entity info. Key-value or graph.",
"Procedural Memory": "Skills and workflows -- how to do tasks, successful tool call patterns. Retrieved by task type.",
}
class AgentMemorySystem:
'''Hierarchical memory for long-running AI agents.'''
def __init__(self, agent_id: str):
self.agent_id = agent_id
# Episodic: stores conversations, searchable by semantic similarity
self.episodic = chroma.create_collection(f"episodic_{agent_id}")
# Semantic: stores facts (key-value style)
self.semantic_facts = {} # in production: Redis or DynamoDB
# Working memory: recent conversation history (context window)
self.working_memory: list[dict] = []
self.max_working_memory = 10 # last 10 exchanges
def get_embedding(self, text: str) -> list[float]:
resp = client.embeddings.create(input=text, model="text-embedding-3-small")
return resp.data[0].embedding
def add_episodic(self, conversation: str, metadata: dict = None) -> None:
'''Store a conversation in episodic memory with embedding.'''
emb = self.get_embedding(conversation)
doc_id = f"ep_{datetime.now().timestamp()}"
self.episodic.add(
ids=[doc_id],
embeddings=[emb],
documents=[conversation],
metadatas=[{"timestamp": datetime.now().isoformat(), **(metadata or {})}],
)
def retrieve_episodic(self, query: str, n: int = 3) -> list[str]:
'''Find relevant past conversations for current context.'''
if self.episodic.count() == 0:
return []
results = self.episodic.query(query_embeddings=[self.get_embedding(query)], n_results=min(n, self.episodic.count()))
return results["documents"][0]
def store_fact(self, key: str, value: str) -> None:
'''Store a semantic fact about the user or domain.'''
self.semantic_facts[key] = {"value": value, "timestamp": datetime.now().isoformat()}
def recall_facts(self, keys: list[str]) -> dict:
'''Recall specific facts.'''
return {k: self.semantic_facts[k]["value"] for k in keys if k in self.semantic_facts}
def add_to_working(self, role: str, content: str) -> None:
self.working_memory.append({"role": role, "content": content})
if len(self.working_memory) > self.max_working_memory * 2:
# Consolidate oldest exchanges into episodic before dropping
oldest = self.working_memory[:4]
conversation = "
".join(f"{m['role']}: {m['content']}" for m in oldest)
self.add_episodic(conversation)
self.working_memory = self.working_memory[4:]
def build_context(self, current_query: str) -> list[dict]:
'''Build an augmented context by combining all memory types.'''
past_episodes = self.retrieve_episodic(current_query)
facts = self.recall_facts(list(self.semantic_facts.keys()))
system_additions = []
if past_episodes:
system_additions.append("=== Relevant Past Conversations ===
" + "
".join(past_episodes))
if facts:
system_additions.append("=== Known Facts About User ===
" + json.dumps(facts, indent=2))
messages = [{"role": "system", "content": "You are a persistent AI assistant.
" + "
".join(system_additions)}]
messages.extend(self.working_memory)
messages.append({"role": "user", "content": current_query})
return messages
# Usage:
memory = AgentMemorySystem("alice_session_1")
memory.store_fact("user_name", "Alice")
memory.store_fact("preferred_language", "Python")
memory.store_fact("current_project", "building a recommendation system")
context = memory.build_context("How should I implement the user similarity calculation?")
print(f"Context has {len(context)} messages")
print(f"System message length: {len(context[0]['content'])} chars")Tip
Tip
Practice Agentic Memory Persistent Context Management in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
Use Chrome DevTools Memory tab to detect leaks
Practice Task
Note
Practice Task — (1) Write a working example of Agentic Memory Persistent Context Management from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with Agentic Memory Persistent Context Management is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.