LLM Configuration
This page provides comprehensive documentation for configuring Large Language Models (LLMs) in PraisonAI, including retry mechanisms, timeout settings, custom headers, and advanced optimization options.Core LLM Configuration
Basic Setup
Copy
from praisonaiagents import Agent
agent = Agent(
name="Assistant",
llm="gpt-4o",
llm_config={
"temperature": 0.7,
"max_tokens": 4000,
"timeout": 60,
"api_key": "your-api-key"
}
)
Provider-Specific Configuration
Copy
# OpenAI Configuration
openai_config = {
"model": "gpt-4o",
"api_key": "sk-...",
"organization": "org-...",
"base_url": "https://api.openai.com/v1",
"timeout": 60,
"max_retries": 3,
"temperature": 0.7,
"max_tokens": 4000,
"presence_penalty": 0.1,
"frequency_penalty": 0.1
}
# Anthropic Configuration
anthropic_config = {
"model": "claude-3-sonnet-20240229",
"api_key": "sk-ant-...",
"base_url": "https://api.anthropic.com",
"timeout": 90,
"max_retries": 3,
"temperature": 0.7,
"max_tokens": 4000,
"anthropic_version": "2023-06-01"
}
# Custom/Local LLM Configuration
custom_config = {
"model": "custom-model",
"base_url": "http://localhost:8000",
"timeout": 120,
"headers": {
"Authorization": "Bearer custom-token"
}
}
Retry Logic Configuration
Basic Retry Settings
Copy
retry_config = {
"max_retries": 3,
"retry_delay": 2.0, # seconds
"retry_multiplier": 2.0, # exponential backoff multiplier
"max_retry_delay": 30.0, # maximum delay between retries
"retry_on_status": [429, 500, 502, 503, 504], # HTTP status codes
"retry_on_errors": [
"RateLimitError",
"APIConnectionError",
"Timeout",
"ServiceUnavailableError"
]
}
Advanced Retry Logic
Copy
advanced_retry_config = {
"retry_strategy": "exponential_backoff_with_jitter",
"max_retries": 5,
"base_delay": 1.0,
"max_delay": 60.0,
"jitter": 0.1, # 10% randomization
# Error-specific retry behavior
"error_retry_config": {
"RateLimitError": {
"max_retries": 10,
"base_delay": 5.0,
"respect_retry_after": True
},
"APIConnectionError": {
"max_retries": 3,
"base_delay": 2.0,
"increase_timeout": True
},
"InsufficientQuotaError": {
"max_retries": 0, # Don't retry
"fallback_model": "gpt-3.5-turbo"
}
},
# Circuit breaker configuration
"circuit_breaker": {
"enabled": True,
"failure_threshold": 5,
"recovery_timeout": 60,
"half_open_requests": 2
}
}
Custom Retry Logic Implementation
Copy
def custom_retry_handler(error, attempt, config):
"""Custom retry logic for specific scenarios"""
if isinstance(error, RateLimitError):
# Extract retry-after header if available
retry_after = error.response.headers.get('retry-after', 60)
return min(retry_after, config['max_delay'])
elif isinstance(error, ModelOverloadedError):
# Switch to a different model
config['fallback_model'] = "gpt-3.5-turbo"
return config['base_delay'] * (2 ** attempt)
else:
# Default exponential backoff
return min(
config['base_delay'] * (config['retry_multiplier'] ** attempt),
config['max_delay']
)
llm_config = {
"retry_handler": custom_retry_handler,
"max_retries": 5
}
Timeout Configuration
Timeout Settings
Copy
timeout_config = {
# Basic timeout
"timeout": 60, # seconds
# Detailed timeout configuration
"timeout_config": {
"connect": 5.0, # Connection timeout
"read": 60.0, # Read timeout
"write": 10.0, # Write timeout
"pool": 5.0 # Connection pool timeout
},
# Dynamic timeout based on request
"dynamic_timeout": {
"base": 30,
"per_token": 0.01, # Additional time per token
"min": 10,
"max": 300
},
# Timeout retry behavior
"timeout_retry": {
"increase_factor": 1.5, # Increase timeout on retry
"max_timeout": 300
}
}
Request-Specific Timeouts
Copy
# Configure timeouts based on operation type
operation_timeouts = {
"completion": {
"timeout": 60,
"dynamic": True,
"factors": {
"max_tokens": 0.01,
"temperature": 1.2 # Higher temperature = more time
}
},
"embedding": {
"timeout": 30,
"batch_factor": 0.1 # Per item in batch
},
"chat": {
"timeout": 90,
"message_factor": 5 # Per message in history
}
}
Custom Headers Configuration
Basic Headers
Copy
headers_config = {
"headers": {
"Authorization": "Bearer your-api-key",
"Content-Type": "application/json",
"User-Agent": "PraisonAI/1.0",
"X-Custom-Header": "custom-value"
}
}
Dynamic Headers
Copy
import uuid
def generate_headers(request_type, model, **kwargs):
"""Generate headers dynamically based on request"""
headers = {
"User-Agent": f"PraisonAI/1.0 ({request_type})",
"X-Model": model,
"X-Request-ID": str(uuid.uuid4()),
"X-Client-Version": "1.0.0"
}
# Add authentication
if api_key := kwargs.get('api_key'):
headers["Authorization"] = f"Bearer {api_key}"
# Add custom headers for specific providers
if "anthropic" in model:
headers["anthropic-version"] = "2023-06-01"
elif "openai" in model:
headers["OpenAI-Beta"] = "assistants=v1"
return headers
llm_config = {
"headers_generator": generate_headers,
"static_headers": {
"X-Environment": "production"
}
}
Provider-Specific Headers
Copy
import time
# OpenAI specific headers
openai_headers = {
"OpenAI-Organization": "org-xxx",
"OpenAI-Beta": "assistants=v1",
"X-Request-ID": "unique-request-id"
}
# Anthropic specific headers
anthropic_headers = {
"anthropic-version": "2023-06-01",
"X-Request-Source": "praisonai"
}
# Custom authentication headers
custom_auth_headers = {
"X-API-Key": "your-api-key",
"X-API-Secret": "your-secret",
"X-Timestamp": str(int(time.time())),
"X-Signature": "generated-signature"
}
Advanced LLM Configuration
Load Balancing
Copy
load_balancing_config = {
"strategy": "round_robin", # or "least_latency", "weighted"
"endpoints": [
{
"url": "https://api.openai.com/v1",
"weight": 0.6,
"models": ["gpt-4o", "gpt-3.5-turbo"]
},
{
"url": "https://api.anthropic.com",
"weight": 0.4,
"models": ["claude-3-sonnet"]
}
],
"health_check": {
"enabled": True,
"interval": 60,
"timeout": 5,
"failure_threshold": 3
}
}
Model Fallback Configuration
Copy
fallback_config = {
"primary_model": "gpt-4o",
"fallback_chain": [
{
"model": "gpt-4-turbo",
"condition": "rate_limit",
"max_attempts": 2
},
{
"model": "gpt-3.5-turbo",
"condition": "any_error",
"temperature_adjustment": -0.2 # More deterministic
},
{
"model": "claude-3-sonnet",
"condition": "repeated_failure",
"provider_switch": True
}
],
"fallback_strategy": "progressive", # or "immediate"
"preserve_context": True
}
Request Optimization
Copy
optimization_config = {
# Request batching
"batching": {
"enabled": True,
"max_batch_size": 10,
"batch_timeout": 0.1, # seconds
"dynamic_batching": True
},
# Response streaming
"streaming": {
"enabled": True,
"chunk_size": 100,
"buffer_size": 1000,
"timeout_per_chunk": 30
},
# Caching
"cache": {
"enabled": True,
"ttl": 3600,
"max_size": 1000,
"key_strategy": "semantic", # or "exact"
"similarity_threshold": 0.95
},
# Token optimization
"token_optimization": {
"compress_prompts": True,
"remove_redundancy": True,
"dynamic_max_tokens": True,
"reserve_completion_tokens": 500
}
}
Rate Limiting Configuration
Copy
rate_limit_config = {
"rate_limits": {
"requests_per_minute": 60,
"tokens_per_minute": 90000,
"requests_per_day": 10000
},
"rate_limit_strategy": "adaptive", # or "fixed", "burst"
"burst_config": {
"burst_size": 10,
"refill_rate": 1.0 # per second
},
"quota_management": {
"track_usage": True,
"warn_at_percentage": 80,
"hard_limit_behavior": "queue" # or "reject", "fallback"
}
}
Complete Configuration Example
Copy
from praisonaiagents import Agent
# Comprehensive LLM configuration
agent = Agent(
name="ProductionAgent",
llm="gpt-4o",
llm_config={
# Model settings
"temperature": 0.7,
"max_tokens": 4000,
"top_p": 0.9,
"presence_penalty": 0.1,
"frequency_penalty": 0.1,
# Timeout configuration
"timeout": 60,
"timeout_config": {
"connect": 5,
"read": 60,
"dynamic": True
},
# Retry configuration
"max_retries": 5,
"retry_delay": 2.0,
"retry_multiplier": 2.0,
"retry_on_status": [429, 500, 502, 503],
# Headers
"headers": {
"User-Agent": "PraisonAI/1.0",
"X-Request-Source": "production"
},
# Advanced features
"streaming": True,
"cache_enabled": True,
"fallback_models": ["gpt-3.5-turbo"],
# Rate limiting
"rate_limit_config": {
"requests_per_minute": 60,
"adaptive": True
}
}
)
Environment Variables
Copy
# Basic LLM settings
export OPENAI_API_KEY="sk-..."
export OPENAI_MODEL="gpt-4o"
export OPENAI_TEMPERATURE="0.7"
# Timeout settings
export PRAISONAI_LLM_TIMEOUT="60"
export PRAISONAI_LLM_CONNECT_TIMEOUT="5"
export PRAISONAI_LLM_READ_TIMEOUT="60"
# Retry settings
export PRAISONAI_LLM_MAX_RETRIES="3"
export PRAISONAI_LLM_RETRY_DELAY="2"
export PRAISONAI_LLM_RETRY_MULTIPLIER="2"
# Headers
export PRAISONAI_LLM_USER_AGENT="PraisonAI/1.0"
export PRAISONAI_LLM_CUSTOM_HEADERS='{"X-Custom": "value"}'
# Advanced settings
export PRAISONAI_LLM_STREAMING="true"
export PRAISONAI_LLM_CACHE_ENABLED="true"
export PRAISONAI_LLM_RATE_LIMIT="60"
Monitoring and Debugging
Copy
monitoring_config = {
"logging": {
"log_requests": True,
"log_responses": True,
"log_level": "INFO",
"sanitize_keys": ["api_key", "authorization"]
},
"metrics": {
"track_latency": True,
"track_tokens": True,
"track_costs": True,
"export_interval": 60
},
"debugging": {
"capture_raw_responses": False,
"validate_responses": True,
"break_on_error": False
}
}
See Also
- Model Configuration - Supported models and providers
- Agent Configuration - Agent-level LLM settings
- Best Practices - LLM configuration guidelines

