Agentic Memory — Persistent Context Management

Long-running agents face the context window problem: you can't fit months of interactions in a single prompt. Effective agents maintain different tiers of memory: working memory (current context), episodic memory (past conversations, stored in vector DB), semantic memory (facts), and procedural memory (learned skills).

20 min•By Priygop Team•Updated 2026

Hierarchical Agent Memory

import chromadb
from openai import OpenAI
import json
from datetime import datetime

client = OpenAI()
chroma = chromadb.Client()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# FOUR TYPES OF AGENT MEMORY
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
memory_types = {
    "Working Memory":    "Current context window -- what's actively in the LLM prompt. Limited by context window size.",
    "Episodic Memory":   "Past conversations and experiences -- stored in vector DB, retrieved by similarity.",
    "Semantic Memory":   "Facts and knowledge -- user preferences, domain facts, entity info. Key-value or graph.",
    "Procedural Memory": "Skills and workflows -- how to do tasks, successful tool call patterns. Retrieved by task type.",
}

class AgentMemorySystem:
    '''Hierarchical memory for long-running AI agents.'''

    def __init__(self, agent_id: str):
        self.agent_id = agent_id

        # Episodic: stores conversations, searchable by semantic similarity
        self.episodic = chroma.create_collection(f"episodic_{agent_id}")

        # Semantic: stores facts (key-value style)
        self.semantic_facts = {}  # in production: Redis or DynamoDB

        # Working memory: recent conversation history (context window)
        self.working_memory: list[dict] = []
        self.max_working_memory = 10  # last 10 exchanges

    def get_embedding(self, text: str) -> list[float]:
        resp = client.embeddings.create(input=text, model="text-embedding-3-small")
        return resp.data[0].embedding

    def add_episodic(self, conversation: str, metadata: dict = None) -> None:
        '''Store a conversation in episodic memory with embedding.'''
        emb = self.get_embedding(conversation)
        doc_id = f"ep_{datetime.now().timestamp()}"
        self.episodic.add(
            ids=[doc_id],
            embeddings=[emb],
            documents=[conversation],
            metadatas=[{"timestamp": datetime.now().isoformat(), **(metadata or {})}],
        )

    def retrieve_episodic(self, query: str, n: int = 3) -> list[str]:
        '''Find relevant past conversations for current context.'''
        if self.episodic.count() == 0:
            return []
        results = self.episodic.query(query_embeddings=[self.get_embedding(query)], n_results=min(n, self.episodic.count()))
        return results["documents"][0]

    def store_fact(self, key: str, value: str) -> None:
        '''Store a semantic fact about the user or domain.'''
        self.semantic_facts[key] = {"value": value, "timestamp": datetime.now().isoformat()}

    def recall_facts(self, keys: list[str]) -> dict:
        '''Recall specific facts.'''
        return {k: self.semantic_facts[k]["value"] for k in keys if k in self.semantic_facts}

    def add_to_working(self, role: str, content: str) -> None:
        self.working_memory.append({"role": role, "content": content})
        if len(self.working_memory) > self.max_working_memory * 2:
            # Consolidate oldest exchanges into episodic before dropping
            oldest = self.working_memory[:4]
            conversation = "
".join(f"{m['role']}: {m['content']}" for m in oldest)
            self.add_episodic(conversation)
            self.working_memory = self.working_memory[4:]

    def build_context(self, current_query: str) -> list[dict]:
        '''Build an augmented context by combining all memory types.'''
        past_episodes = self.retrieve_episodic(current_query)
        facts = self.recall_facts(list(self.semantic_facts.keys()))

        system_additions = []
        if past_episodes:
            system_additions.append("=== Relevant Past Conversations ===
" + "

".join(past_episodes))
        if facts:
            system_additions.append("=== Known Facts About User ===
" + json.dumps(facts, indent=2))

        messages = [{"role": "system", "content": "You are a persistent AI assistant.

" + "

".join(system_additions)}]
        messages.extend(self.working_memory)
        messages.append({"role": "user", "content": current_query})
        return messages

# Usage:
memory = AgentMemorySystem("alice_session_1")
memory.store_fact("user_name", "Alice")
memory.store_fact("preferred_language", "Python")
memory.store_fact("current_project", "building a recommendation system")

context = memory.build_context("How should I implement the user similarity calculation?")
print(f"Context has {len(context)} messages")
print(f"System message length: {len(context[0]['content'])} chars")

Tip

Practice Agentic Memory Persistent Context Management in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Use Chrome DevTools Memory tab to detect leaks

Practice Task

Note

Practice Task — (1) Write a working example of Agentic Memory Persistent Context Management from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Agentic Memory Persistent Context Management is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module