Skip to main content

LLM Configuration

This page provides comprehensive documentation for configuring Large Language Models (LLMs) in PraisonAI, including retry mechanisms, timeout settings, custom headers, and advanced optimization options.

Environment Variable Precedence

PraisonAI resolves LLM configuration from environment variables using a documented precedence order, ensuring consistent behavior across all components.
VariablePurposePrecedence
MODEL_NAMEModel name (highest priority)1
OPENAI_MODEL_NAMEModel name (legacy compat)2
OPENAI_BASE_URLLLM endpoint URL (highest priority)1
OPENAI_API_BASELLM endpoint URL (legacy compat)2
OLLAMA_API_BASEOllama endpoint URL3
ANTHROPIC_API_KEYAnthropic API key (for anthropic/* models)
GOOGLE_API_KEYGoogle API key (for google/* models)
GEMINI_API_KEYGemini API key (for gemini/* models)
GROQ_API_KEYGroq API key (for groq/* models)
COHERE_API_KEYCohere API key (for cohere/* models)
OPENROUTER_API_KEYOpenRouter API key (for openrouter/* models)
OLLAMA_API_KEYOllama API key (for ollama/* models)
OPENAI_API_KEYOpenAI API key (for OpenAI models and fallback)
Defaults:
  • Model: gpt-4o-mini
  • Base URL: Provider-specific or https://api.openai.com/v1
  • API Key: None
PraisonAI resolves these once at startup; all internal components use the same values. For complete environment variable configuration guide, see LLM Endpoint Configuration.

Core LLM Configuration

Basic Setup

from praisonaiagents import Agent

agent = Agent(
    name="Assistant",
    llm="gpt-4o",
    llm={
        "temperature": 0.7,
        "max_tokens": 4000,
        "timeout": 60,
        "api_key": "your-api-key"
    }
)

Provider-Specific Configuration

# OpenAI Configuration
openai_config = {
    "model": "gpt-4o",
    "api_key": "sk-...",
    "organization": "org-...",
    "base_url": "https://api.openai.com/v1",
    "timeout": 60,
    "max_retries": 3,
    "temperature": 0.7,
    "max_tokens": 4000,
    "presence_penalty": 0.1,
    "frequency_penalty": 0.1
}

# Anthropic Configuration
anthropic_config = {
    "model": "claude-3-sonnet-20240229",
    "api_key": "sk-ant-...",
    "base_url": "https://api.anthropic.com",
    "timeout": 90,
    "max_retries": 3,
    "temperature": 0.7,
    "max_tokens": 4000,
    "anthropic_version": "2023-06-01"
}

# Custom/Local LLM Configuration
custom_config = {
    "model": "custom-model",
    "base_url": "http://localhost:8000",
    "timeout": 120,
    "headers": {
        "Authorization": "Bearer custom-token"
    }
}

Retry Logic Configuration

Basic Retry Settings

retry_config = {
    "max_retries": 3,
    "retry_delay": 2.0,  # seconds
    "retry_multiplier": 2.0,  # exponential backoff multiplier
    "max_retry_delay": 30.0,  # maximum delay between retries
    "retry_on_status": [429, 500, 502, 503, 504],  # HTTP status codes
    "retry_on_errors": [
        "RateLimitError",
        "APIConnectionError",
        "Timeout",
        "ServiceUnavailableError"
    ]
}

Advanced Retry Logic

advanced_retry_config = {
    "retry_strategy": "exponential_backoff_with_jitter",
    "max_retries": 5,
    "base_delay": 1.0,
    "max_delay": 60.0,
    "jitter": 0.1,  # 10% randomization
    
    # Error-specific retry behavior
    "error_retry_config": {
        "RateLimitError": {
            "max_retries": 10,
            "base_delay": 5.0,
            "respect_retry_after": True
        },
        "APIConnectionError": {
            "max_retries": 3,
            "base_delay": 2.0,
            "increase_timeout": True
        },
        "InsufficientQuotaError": {
            "max_retries": 0,  # Don't retry
            "fallback_model": "gpt-3.5-turbo"
        }
    },
    
    # Circuit breaker configuration (for custom integrations)
    # Note: Tool circuit breakers are automatic - see /features/tool-circuit-breaker
    "circuit_breaker": {
        "failure_threshold": 5,
        "recovery_timeout": 60.0,
        "success_threshold": 2,
        "timeout": 30.0,
        "graceful_degradation": True
    }
}

Custom Retry Logic Implementation

def custom_retry_handler(error, attempt, config):
    """Custom retry logic for specific scenarios"""
    if isinstance(error, RateLimitError):
        # Extract retry-after header if available
        retry_after = error.response.headers.get('retry-after', 60)
        return min(retry_after, config['max_delay'])
    
    elif isinstance(error, ModelOverloadedError):
        # Switch to a different model
        config['fallback_model'] = "gpt-3.5-turbo"
        return config['base_delay'] * (2 ** attempt)
    
    else:
        # Default exponential backoff
        return min(
            config['base_delay'] * (config['retry_multiplier'] ** attempt),
            config['max_delay']
        )

llm_config = {
    "retry_handler": custom_retry_handler,
    "max_retries": 5
}

Timeout Configuration

Timeout Settings

timeout_config = {
    # Basic timeout
    "timeout": 60,  # seconds
    
    # Detailed timeout configuration
    "timeout_config": {
        "connect": 5.0,  # Connection timeout
        "read": 60.0,    # Read timeout
        "write": 10.0,   # Write timeout
        "pool": 5.0      # Connection pool timeout
    },
    
    # Dynamic timeout based on request
    "dynamic_timeout": {
        "base": 30,
        "per_token": 0.01,  # Additional time per token
        "min": 10,
        "max": 300
    },
    
    # Timeout retry behavior
    "timeout_retry": {
        "increase_factor": 1.5,  # Increase timeout on retry
        "max_timeout": 300
    }
}

Request-Specific Timeouts

# Configure timeouts based on operation type
operation_timeouts = {
    "completion": {
        "timeout": 60,
        "dynamic": True,
        "factors": {
            "max_tokens": 0.01,
            "temperature": 1.2  # Higher temperature = more time
        }
    },
    "embedding": {
        "timeout": 30,
        "batch_factor": 0.1  # Per item in batch
    },
    "chat": {
        "timeout": 90,
        "message_factor": 5  # Per message in history
    }
}

Custom Headers Configuration

Basic Headers

headers_config = {
    "headers": {
        "Authorization": "Bearer your-api-key",
        "Content-Type": "application/json",
        "User-Agent": "PraisonAI/1.0",
        "X-Custom-Header": "custom-value"
    }
}

Dynamic Headers

import uuid

def generate_headers(request_type, model, **kwargs):
    """Generate headers dynamically based on request"""
    headers = {
        "User-Agent": f"PraisonAI/1.0 ({request_type})",
        "X-Model": model,
        "X-Request-ID": str(uuid.uuid4()),
        "X-Client-Version": "1.0.0"
    }
    
    # Add authentication
    if api_key := kwargs.get('api_key'):
        headers["Authorization"] = f"Bearer {api_key}"
    
    # Add custom headers for specific providers
    if "anthropic" in model:
        headers["anthropic-version"] = "2023-06-01"
    elif "openai" in model:
        headers["OpenAI-Beta"] = "assistants=v1"
    
    return headers

llm_config = {
    "headers_generator": generate_headers,
    "static_headers": {
        "X-Environment": "production"
    }
}

Provider-Specific Headers

import time

# OpenAI specific headers
openai_headers = {
    "OpenAI-Organization": "org-xxx",
    "OpenAI-Beta": "assistants=v1",
    "X-Request-ID": "unique-request-id"
}

# Anthropic specific headers
anthropic_headers = {
    "anthropic-version": "2023-06-01",
    "X-Request-Source": "praisonai"
}

# Custom authentication headers
custom_auth_headers = {
    "X-API-Key": "your-api-key",
    "X-API-Secret": "your-secret",
    "X-Timestamp": str(int(time.time())),
    "X-Signature": "generated-signature"
}

Advanced LLM Configuration

Load Balancing

load_balancing_config = {
    "strategy": "round_robin",  # or "least_latency", "weighted"
    "endpoints": [
        {
            "url": "https://api.openai.com/v1",
            "weight": 0.6,
            "models": ["gpt-4o", "gpt-3.5-turbo"]
        },
        {
            "url": "https://api.anthropic.com",
            "weight": 0.4,
            "models": ["claude-3-sonnet"]
        }
    ],
    "health_check": {
        "enabled": True,
        "interval": 60,
        "timeout": 5,
        "failure_threshold": 3
    }
}

Model Fallback Configuration

fallback_config = {
    "primary_model": "gpt-4o",
    "fallback_chain": [
        {
            "model": "gpt-4-turbo",
            "condition": "rate_limit",
            "max_attempts": 2
        },
        {
            "model": "gpt-3.5-turbo",
            "condition": "any_error",
            "temperature_adjustment": -0.2  # More deterministic
        },
        {
            "model": "claude-3-sonnet",
            "condition": "repeated_failure",
            "provider_switch": True
        }
    ],
    "fallback_strategy": "progressive",  # or "immediate"
    "preserve_context": True
}

Request Optimization

optimization_config = {
    # Request batching
    "batching": {
        "enabled": True,
        "max_batch_size": 10,
        "batch_timeout": 0.1,  # seconds
        "dynamic_batching": True
    },
    
    # Response streaming
    "streaming": {
        "enabled": True,
        "chunk_size": 100,
        "buffer_size": 1000,
        "timeout_per_chunk": 30
    },
    
    # Caching
    "cache": {
        "enabled": True,
        "ttl": 3600,
        "max_size": 1000,
        "key_strategy": "semantic",  # or "exact"
        "similarity_threshold": 0.95
    },
    
    # Token optimization
    "token_optimization": {
        "compress_prompts": True,
        "remove_redundancy": True,
        "dynamic_max_tokens": True,
        "reserve_completion_tokens": 500
    }
}

Rate Limiting Configuration

rate_limit_config = {
    "rate_limits": {
        "requests_per_minute": 60,
        "tokens_per_minute": 90000,
        "requests_per_day": 10000
    },
    "rate_limit_strategy": "adaptive",  # or "fixed", "burst"
    "burst_config": {
        "burst_size": 10,
        "refill_rate": 1.0  # per second
    },
    "quota_management": {
        "track_usage": True,
        "warn_at_percentage": 80,
        "hard_limit_behavior": "queue"  # or "reject", "fallback"
    }
}

Complete Configuration Example

from praisonaiagents import Agent

# Comprehensive LLM configuration
agent = Agent(
    name="ProductionAgent",
    llm="gpt-4o",
    llm={
        # Model settings
        "temperature": 0.7,
        "max_tokens": 4000,
        "top_p": 0.9,
        "presence_penalty": 0.1,
        "frequency_penalty": 0.1,
        
        # Timeout configuration
        "timeout": 60,
        "timeout_config": {
            "connect": 5,
            "read": 60,
            "dynamic": True
        },
        
        # Retry configuration
        "max_retries": 5,
        "retry_delay": 2.0,
        "retry_multiplier": 2.0,
        "retry_on_status": [429, 500, 502, 503],
        
        # Headers
        "headers": {
            "User-Agent": "PraisonAI/1.0",
            "X-Request-Source": "production"
        },
        
        # Advanced features
        "streaming": True,
        "cache_enabled": True,
        "fallback_models": ["gpt-3.5-turbo"],
        
        # Rate limiting
        "rate_limit_config": {
            "requests_per_minute": 60,
            "adaptive": True
        }
    }
)

Environment Variables

# Basic LLM settings
export OPENAI_API_KEY="sk-..."
export OPENAI_MODEL="gpt-4o"
export OPENAI_TEMPERATURE="0.7"

# Timeout settings
export PRAISONAI_LLM_TIMEOUT="60"
export PRAISONAI_LLM_CONNECT_TIMEOUT="5"
export PRAISONAI_LLM_READ_TIMEOUT="60"

# Retry settings
export PRAISONAI_LLM_MAX_RETRIES="3"
export PRAISONAI_LLM_RETRY_DELAY="2"
export PRAISONAI_LLM_RETRY_MULTIPLIER="2"

# Headers
export PRAISONAI_LLM_USER_AGENT="PraisonAI/1.0"
export PRAISONAI_LLM_CUSTOM_HEADERS='{"X-Custom": "value"}'

# Advanced settings
export PRAISONAI_LLM_STREAMING="true"
export PRAISONAI_LLM_CACHE_ENABLED="true"
export PRAISONAI_LLM_RATE_LIMIT="60"

Monitoring and Debugging

monitoring_config = {
    "logging": {
        "log_requests": True,
        "log_responses": True,
        "log_level": "INFO",
        "sanitize_keys": ["api_key", "authorization"]
    },
    "metrics": {
        "track_latency": True,
        "track_tokens": True,
        "track_costs": True,
        "export_interval": 60
    },
    "debugging": {
        "capture_raw_responses": False,
        "validate_responses": True,
        "break_on_error": False
    }
}

See Also