Skip to main content

Configuration Best Practices

This guide provides comprehensive best practices for configuring PraisonAI components, ensuring optimal performance, reliability, and maintainability of your AI agent systems.

General Configuration Principles

1. Start Simple, Iterate Gradually

# ❌ Avoid: Over-engineering from the start
agent = Agent(
    name="ComplexAgent",
    max_iter=100,
    max_retry_limit=10,
    context_length=200000,
    # ... 50 more parameters
)

# ✅ Better: Start simple and add complexity as needed
agent = Agent(
    name="SimpleAgent",
    role="Assistant",
    llm="gpt-4o-mini"  # Start with efficient model
)

# Add features incrementally based on requirements
if production_environment:
    agent.max_retry_limit = 3
    agent.add_guardrails(safety_rules)

2. Use Environment-Specific Configurations

# ✅ Good: Environment-aware configuration
import os

def get_agent_config():
    env = os.getenv("ENVIRONMENT", "development")
    
    base_config = {
        "name": "Assistant",
        "llm": "gpt-4o-mini",
        "temperature": 0.7
    }
    
    if env == "production":
        base_config.update({
            "max_retry_limit": 5,
            "timeout": 60,
            "guardrails": "strict",
            "logging": "info"
        })
    elif env == "staging":
        base_config.update({
            "max_retry_limit": 3,
            "timeout": 120,
            "logging": "debug"
        })
    else:  # development
        base_config.update({
            "max_retry_limit": 1,
            "timeout": 300,
            "logging": "debug",
            "verbose": True
        })
    
    return base_config

3. Centralize Configuration Management

# ✅ Good: Centralized configuration
# config/agent_config.yaml
"""
defaults:
  timeout: 60
  max_retries: 3
  temperature: 0.7

agents:
  researcher:
    role: "Research Specialist"
    max_iter: 20
    tools: ["web_search", "arxiv", "wikipedia"]
    
  analyst:
    role: "Data Analyst"
    max_iter: 15
    tools: ["calculator", "data_viz", "statistics"]
"""

# config_loader.py
import yaml

class ConfigManager:
    def __init__(self, config_path="config/agent_config.yaml"):
        with open(config_path, 'r') as f:
            self.config = yaml.safe_load(f)
    
    def get_agent_config(self, agent_type):
        defaults = self.config.get("defaults", {})
        specific = self.config.get("agents", {}).get(agent_type, {})
        return {**defaults, **specific}

Agent Configuration Best Practices

1. Optimize Iteration and Retry Limits

# Task complexity guidelines
TASK_COMPLEXITY_SETTINGS = {
    "simple": {
        "max_iter": 5,
        "max_retry_limit": 1,
        "timeout": 30
    },
    "moderate": {
        "max_iter": 15,
        "max_retry_limit": 3,
        "timeout": 60
    },
    "complex": {
        "max_iter": 30,
        "max_retry_limit": 5,
        "timeout": 120
    }
}

def create_agent_for_task(task_type, complexity="moderate"):
    settings = TASK_COMPLEXITY_SETTINGS[complexity]
    return Agent(
        name=f"{task_type}_agent",
        **settings
    )

2. Configure Context Windows Wisely

# ✅ Good: Dynamic context window management
def calculate_context_window(task, model="gpt-4o"):
    MODEL_LIMITS = {
        "gpt-4o": 128000,
        "gpt-3.5-turbo": 16385,
        "claude-3": 200000
    }
    
    # Reserve space for output
    output_reserve = 2000
    
    # Calculate based on task needs
    if task.requires_long_context:
        return MODEL_LIMITS[model] - output_reserve
    else:
        # Use smaller window for efficiency
        return min(50000, MODEL_LIMITS[model])

3. Implement Graceful Degradation

# ✅ Good: Fallback configuration
fallback_config = {
    "primary_model": "gpt-4o",
    "fallback_chain": [
        {"model": "gpt-4-turbo", "on_error": ["rate_limit"]},
        {"model": "gpt-3.5-turbo", "on_error": ["any"]},
        {"model": "local_llm", "on_error": ["api_down"]}
    ],
    "degradation_strategy": {
        "reduce_context": True,
        "simplify_prompts": True,
        "disable_tools": ["expensive_api_tool"]
    }
}

Task Configuration Best Practices

1. Design Clear Task Dependencies

# ✅ Good: Explicit task flow
task_flow = {
    "data_collection": {
        "type": "normal",
        "next_tasks": ["data_validation"],
        "timeout": 60
    },
    "data_validation": {
        "type": "decision",
        "condition": {
            "field": "data_quality",
            "operator": ">=",
            "value": 0.8
        },
        "next_tasks": {
            "true": "data_analysis",
            "false": "data_cleaning"
        }
    },
    "data_cleaning": {
        "type": "normal",
        "next_tasks": ["data_validation"],  # Loop back
        "max_attempts": 3
    },
    "data_analysis": {
        "type": "normal",
        "is_terminal": True
    }
}

2. Use Conditional Logic Sparingly

# ❌ Avoid: Overly complex conditions
complex_condition = {
    "type": "compound",
    "operator": "AND",
    "conditions": [
        {"type": "compound", "operator": "OR", "conditions": [...]},
        {"type": "compound", "operator": "AND", "conditions": [...]},
        # ... nested 5 levels deep
    ]
}

# ✅ Better: Simple, readable conditions
def evaluate_task_condition(task_output):
    """Custom evaluator for complex logic"""
    if task_output.quality_score < 0.8:
        return "needs_improvement"
    elif task_output.completeness < 0.9:
        return "needs_completion"
    else:
        return "ready_for_review"

simple_condition = {
    "type": "function",
    "function": evaluate_task_condition
}

Memory Configuration Best Practices

1. Choose the Right Memory Provider

# Decision matrix for memory providers
def select_memory_provider(requirements):
    if requirements.get("graph_relationships"):
        return {
            "provider": "neo4j",
            "graph_enabled": True
        }
    elif requirements.get("simple_storage"):
        return {
            "provider": "rag",
            "use_embedding": False
        }
    elif requirements.get("cloud_sync"):
        return {
            "provider": "mem0",
            "sync_enabled": True
        }
    else:
        return {
            "provider": "rag",
            "use_embedding": True
        }

2. Optimize Quality Thresholds

# ✅ Good: Adaptive quality thresholds
def get_quality_threshold(context):
    base_threshold = 0.7
    
    # Adjust based on context
    adjustments = {
        "critical_task": 0.15,
        "high_precision": 0.10,
        "exploratory": -0.20,
        "speed_priority": -0.15
    }
    
    for key, adjustment in adjustments.items():
        if context.get(key):
            base_threshold += adjustment
    
    return max(0.3, min(0.95, base_threshold))

3. Implement Memory Lifecycle Management

# ✅ Good: Memory lifecycle configuration
memory_lifecycle = {
    "retention_policy": {
        "short_term": {
            "ttl": 86400,  # 1 day
            "cleanup": "hourly"
        },
        "long_term": {
            "ttl": 2592000,  # 30 days
            "archive_after": 604800,  # 7 days
            "compression": True
        },
        "permanent": {
            "criteria": ["high_value", "reference"],
            "backup": True
        }
    },
    "cleanup_strategy": {
        "method": "quality_based",
        "keep_top_percent": 20,
        "min_quality_score": 0.5
    }
}

LLM Configuration Best Practices

1. Implement Smart Retry Logic

# ✅ Good: Intelligent retry configuration
import random

class SmartRetryConfig:
    def __init__(self):
        self.base_delay = 1.0
        self.max_delay = 60.0
        self.error_strategies = {
            "RateLimitError": self.handle_rate_limit,
            "TimeoutError": self.handle_timeout,
            "APIError": self.handle_api_error
        }
    
    def handle_rate_limit(self, error, attempt):
        # Respect rate limit headers
        retry_after = error.headers.get("Retry-After", 60)
        return min(retry_after, self.max_delay)
    
    def handle_timeout(self, error, attempt):
        # Exponential backoff with jitter
        delay = self.base_delay * (2 ** attempt)
        jitter = random.uniform(0, delay * 0.1)
        return min(delay + jitter, self.max_delay)
    
    def handle_api_error(self, error, attempt):
        # Linear backoff for API errors
        return min(self.base_delay * attempt, self.max_delay)

2. Optimize Token Usage

# ✅ Good: Token optimization strategies
token_optimization = {
    "strategies": {
        "compression": {
            "enabled": True,
            "method": "semantic",
            "target_reduction": 0.3
        },
        "caching": {
            "cache_prompts": True,
            "cache_completions": True,
            "similarity_threshold": 0.95
        },
        "truncation": {
            "strategy": "sliding_window",
            "preserve": ["recent", "relevant"],
            "max_history": 10
        }
    },
    "monitoring": {
        "track_usage": True,
        "alert_threshold": 0.8,
        "optimize_on_high_usage": True
    }
}

Tool Configuration Best Practices

1. Set Appropriate Timeouts

# ✅ Good: Tool-specific timeout configuration
TOOL_TIMEOUT_PROFILES = {
    "instant": {
        "timeout": 5,
        "operations": ["cache_lookup", "validation"]
    },
    "fast": {
        "timeout": 15,
        "operations": ["calculation", "local_search"]
    },
    "standard": {
        "timeout": 30,
        "operations": ["api_call", "web_search"]
    },
    "slow": {
        "timeout": 120,
        "operations": ["ml_inference", "large_download"]
    }
}

def get_tool_timeout(tool_name, operation_type):
    for profile_name, profile in TOOL_TIMEOUT_PROFILES.items():
        if operation_type in profile["operations"]:
            return profile["timeout"]
    return 30  # Default

2. Implement Resource Pooling

# ✅ Good: Efficient resource pooling
resource_pool_config = {
    "connection_pools": {
        "http": {
            "min_size": 2,
            "max_size": 20,
            "timeout": 30,
            "recycle_after": 3600,
            "health_check": True
        },
        "database": {
            "min_size": 1,
            "max_size": 10,
            "timeout": 60,
            "retry_on_disconnect": True
        }
    },
    "thread_pools": {
        "io_bound": {
            "workers": "2 * cpu_count",
            "queue_size": 100
        },
        "cpu_bound": {
            "workers": "cpu_count",
            "queue_size": 50
        }
    }
}

Performance Optimization Checklist

1. Caching Strategy

# ✅ Comprehensive caching configuration
caching_strategy = {
    "levels": {
        "l1_memory": {
            "size": 100,
            "ttl": 300,
            "eviction": "lru"
        },
        "l2_disk": {
            "size": 1000,
            "ttl": 3600,
            "compression": True
        },
        "l3_distributed": {
            "provider": "redis",
            "ttl": 86400,
            "sharding": True
        }
    },
    "key_strategy": "semantic_hash",
    "invalidation": {
        "on_update": True,
        "scheduled": "hourly"
    }
}

2. Monitoring and Alerting

# ✅ Comprehensive monitoring setup
monitoring_config = {
    "metrics": {
        "system": ["cpu", "memory", "disk", "network"],
        "application": ["request_rate", "error_rate", "latency"],
        "business": ["task_completion", "quality_scores"]
    },
    "alerts": {
        "critical": {
            "error_rate": {"threshold": 0.05, "window": 300},
            "latency_p99": {"threshold": 5000, "window": 300}
        },
        "warning": {
            "memory_usage": {"threshold": 0.8, "window": 600},
            "queue_depth": {"threshold": 1000, "window": 300}
        }
    },
    "dashboards": {
        "update_frequency": 60,
        "retention": 30  # days
    }
}

Security Best Practices

1. Secure Configuration Storage

# ✅ Good: Secure configuration management
import os
from cryptography.fernet import Fernet

class SecureConfigManager:
    def __init__(self):
        self.key = os.environ.get("CONFIG_ENCRYPTION_KEY")
        self.cipher = Fernet(self.key.encode()) if self.key else None
    
    def get_sensitive_config(self, key):
        """Retrieve and decrypt sensitive configuration"""
        encrypted = os.environ.get(f"ENCRYPTED_{key}")
        if encrypted and self.cipher:
            return self.cipher.decrypt(encrypted.encode()).decode()
        return os.environ.get(key)  # Fallback to plain env var
    
    def validate_config(self, config):
        """Validate configuration security"""
        issues = []
        
        # Check for hardcoded secrets
        for key, value in config.items():
            if isinstance(value, str):
                if "password" in key.lower() and value != "***":
                    issues.append(f"Potential hardcoded password in {key}")
                if "key" in key.lower() and len(value) > 20:
                    issues.append(f"Potential hardcoded API key in {key}")
        
        return issues

2. Implement Least Privilege

# ✅ Good: Role-based configuration
role_based_config = {
    "roles": {
        "viewer": {
            "max_iter": 5,
            "allowed_tools": ["read_only_search"],
            "max_tokens": 1000
        },
        "user": {
            "max_iter": 15,
            "allowed_tools": ["search", "calculate"],
            "max_tokens": 4000
        },
        "power_user": {
            "max_iter": 30,
            "allowed_tools": ["all_except_admin"],
            "max_tokens": 8000
        },
        "admin": {
            "max_iter": 50,
            "allowed_tools": ["all"],
            "max_tokens": 16000,
            "override_guardrails": True
        }
    }
}

Configuration Testing

1. Validate Configurations

# ✅ Good: Configuration validation
class ConfigValidator:
    def __init__(self, schema):
        self.schema = schema
    
    def validate(self, config):
        errors = []
        
        # Type validation
        for key, expected_type in self.schema.items():
            if key in config:
                if not isinstance(config[key], expected_type):
                    errors.append(f"{key} must be {expected_type}")
        
        # Range validation
        if "max_iter" in config:
            if not 1 <= config["max_iter"] <= 100:
                errors.append("max_iter must be between 1 and 100")
        
        # Dependency validation
        if config.get("use_graph") and not config.get("graph_uri"):
            errors.append("graph_uri required when use_graph is True")
        
        return errors

# Test configurations
def test_agent_config():
    validator = ConfigValidator({
        "max_iter": int,
        "temperature": float,
        "llm": str
    })
    
    test_config = {
        "max_iter": 15,
        "temperature": 0.7,
        "llm": "gpt-4o"
    }
    
    errors = validator.validate(test_config)
    assert len(errors) == 0, f"Configuration errors: {errors}"

Summary Checklist

Start Simple: Begin with minimal configuration and add complexity as needed Environment-Specific: Use different configurations for dev, staging, and production Centralize Management: Keep configurations in one place for easy management Validate Everything: Implement validation for all configuration values Monitor Performance: Track the impact of configuration changes Document Decisions: Document why specific values were chosen Regular Review: Periodically review and optimize configurations Security First: Never hardcode secrets, use encryption for sensitive data Test Configurations: Validate configurations before deployment Plan for Failure: Always have fallback configurations ready

See Also