Use this file to discover all available pages before exploring further.
Zep memory APIs provide both recent messages and compressed context summaries, enabling agents to access both short-term and long-term memory efficiently.
from datetime import datetime, timedeltadef get_windowed_memory(session_id: str, hours: int = 24): """Get messages from specific time window + context""" cutoff = datetime.now() - timedelta(hours=hours) # Recent messages within time window messages = zep_client.memory.get_messages( session_id=session_id, created_after=cutoff ) # Context for everything before window context = zep_client.memory.get_context(session_id=session_id) return messages, contextagent = Agent( name="Windowed Memory Agent", instructions="Use 24-hour message window with historical context")
def get_token_limited_memory(session_id: str, max_tokens: int = 2000): """Balance messages vs context based on token budget""" context = zep_client.memory.get_context(session_id=session_id) context_tokens = estimate_tokens(context.summary) # Reserve tokens for context, use remainder for messages available_tokens = max_tokens - context_tokens message_limit = available_tokens // 50 # ~50 tokens per message messages = zep_client.memory.get_messages( session_id=session_id, limit=max(message_limit, 5) # At least 5 messages ) return messages, contextdef estimate_tokens(text: str) -> int: """Rough token estimation (4 chars ≈ 1 token)""" return len(text) // 4 if text else 0
def deduplicated_memory(session_id: str): """Handle overlapping content between messages and context""" messages = zep_client.memory.get_messages(session_id=session_id) context = zep_client.memory.get_context(session_id=session_id) # Check if context already includes recent messages recent_content = " ".join([msg.content for msg in messages[-3:]]) if context.summary and recent_content in context.summary: # Context already includes recent messages, use older messages only messages = messages[:-3] return messages, context
Chat Applications: Use time window strategy (24-48 hours)
Task-Oriented Agents: Use token-limited strategy with higher message priority
Long-Running Sessions: Use smart deduplication to avoid redundancy
Real-Time Systems: Always fetch messages first, context as fallback
Error Handling
Implement robust fallbacks:
def robust_memory_retrieval(session_id: str): """Fail gracefully when Zep is unavailable""" try: messages = zep_client.memory.get_messages(session_id=session_id) context = zep_client.memory.get_context(session_id=session_id) return messages, context except Exception as e: logger.warning(f"Zep retrieval failed: {e}") # Fallback to local cache or simplified memory return get_fallback_memory(session_id)
Performance Optimization
Optimize for your deployment:
Batch Operations: Retrieve memory for multiple sessions at once
Caching: Cache context summaries that don’t change frequently
Async Operations: Use async Zep client for better throughput
Monitoring: Track summary lag and adjust strategies accordingly
Testing Memory Integration
Validate your memory strategy:
def test_memory_consistency(session_id: str): """Test that recent information isn't lost to summary lag""" # Add a test message test_content = f"Test message at {datetime.now()}" zep_client.memory.add_message(session_id, "user", test_content) # Immediately retrieve memory messages, context = get_memory_context(session_id, "test_user") # Verify test message is in recent messages recent_content = [msg.content for msg in messages] assert test_content in recent_content, "Recent message lost to summary lag"