> ## Documentation Index
> Fetch the complete documentation index at: https://docs.praison.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# LLM Configuration

> Complete reference for LLM configuration including retry logic, timeouts, and custom headers

# LLM Configuration

This page provides comprehensive documentation for configuring Large Language Models (LLMs) in PraisonAI, including retry mechanisms, timeout settings, custom headers, and advanced optimization options.

## Core LLM Configuration

### Basic Setup

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
from praisonaiagents import Agent

agent = Agent(
    name="Assistant",
    llm="gpt-4o",
    llm={
        "temperature": 0.7,
        "max_tokens": 4000,
        "timeout": 60,
        "api_key": "your-api-key"
    }
)
```

### Provider-Specific Configuration

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
# OpenAI Configuration
openai_config = {
    "model": "gpt-4o",
    "api_key": "sk-...",
    "organization": "org-...",
    "base_url": "https://api.openai.com/v1",
    "timeout": 60,
    "max_retries": 3,
    "temperature": 0.7,
    "max_tokens": 4000,
    "presence_penalty": 0.1,
    "frequency_penalty": 0.1
}

# Anthropic Configuration
anthropic_config = {
    "model": "claude-3-sonnet-20240229",
    "api_key": "sk-ant-...",
    "base_url": "https://api.anthropic.com",
    "timeout": 90,
    "max_retries": 3,
    "temperature": 0.7,
    "max_tokens": 4000,
    "anthropic_version": "2023-06-01"
}

# Custom/Local LLM Configuration
custom_config = {
    "model": "custom-model",
    "base_url": "http://localhost:8000",
    "timeout": 120,
    "headers": {
        "Authorization": "Bearer custom-token"
    }
}
```

## Retry Logic Configuration

### Basic Retry Settings

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
retry_config = {
    "max_retries": 3,
    "retry_delay": 2.0,  # seconds
    "retry_multiplier": 2.0,  # exponential backoff multiplier
    "max_retry_delay": 30.0,  # maximum delay between retries
    "retry_on_status": [429, 500, 502, 503, 504],  # HTTP status codes
    "retry_on_errors": [
        "RateLimitError",
        "APIConnectionError",
        "Timeout",
        "ServiceUnavailableError"
    ]
}
```

### Advanced Retry Logic

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
advanced_retry_config = {
    "retry_strategy": "exponential_backoff_with_jitter",
    "max_retries": 5,
    "base_delay": 1.0,
    "max_delay": 60.0,
    "jitter": 0.1,  # 10% randomization
    
    # Error-specific retry behavior
    "error_retry_config": {
        "RateLimitError": {
            "max_retries": 10,
            "base_delay": 5.0,
            "respect_retry_after": True
        },
        "APIConnectionError": {
            "max_retries": 3,
            "base_delay": 2.0,
            "increase_timeout": True
        },
        "InsufficientQuotaError": {
            "max_retries": 0,  # Don't retry
            "fallback_model": "gpt-3.5-turbo"
        }
    },
    
    # Circuit breaker configuration (for custom integrations)
    # Note: Tool circuit breakers are automatic - see /features/tool-circuit-breaker
    "circuit_breaker": {
        "failure_threshold": 5,
        "recovery_timeout": 60.0,
        "success_threshold": 2,
        "timeout": 30.0,
        "graceful_degradation": True
    }
}
```

### Custom Retry Logic Implementation

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
def custom_retry_handler(error, attempt, config):
    """Custom retry logic for specific scenarios"""
    if isinstance(error, RateLimitError):
        # Extract retry-after header if available
        retry_after = error.response.headers.get('retry-after', 60)
        return min(retry_after, config['max_delay'])
    
    elif isinstance(error, ModelOverloadedError):
        # Switch to a different model
        config['fallback_model'] = "gpt-3.5-turbo"
        return config['base_delay'] * (2 ** attempt)
    
    else:
        # Default exponential backoff
        return min(
            config['base_delay'] * (config['retry_multiplier'] ** attempt),
            config['max_delay']
        )

llm_config = {
    "retry_handler": custom_retry_handler,
    "max_retries": 5
}
```

## Timeout Configuration

### Timeout Settings

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
timeout_config = {
    # Basic timeout
    "timeout": 60,  # seconds
    
    # Detailed timeout configuration
    "timeout_config": {
        "connect": 5.0,  # Connection timeout
        "read": 60.0,    # Read timeout
        "write": 10.0,   # Write timeout
        "pool": 5.0      # Connection pool timeout
    },
    
    # Dynamic timeout based on request
    "dynamic_timeout": {
        "base": 30,
        "per_token": 0.01,  # Additional time per token
        "min": 10,
        "max": 300
    },
    
    # Timeout retry behavior
    "timeout_retry": {
        "increase_factor": 1.5,  # Increase timeout on retry
        "max_timeout": 300
    }
}
```

### Request-Specific Timeouts

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
# Configure timeouts based on operation type
operation_timeouts = {
    "completion": {
        "timeout": 60,
        "dynamic": True,
        "factors": {
            "max_tokens": 0.01,
            "temperature": 1.2  # Higher temperature = more time
        }
    },
    "embedding": {
        "timeout": 30,
        "batch_factor": 0.1  # Per item in batch
    },
    "chat": {
        "timeout": 90,
        "message_factor": 5  # Per message in history
    }
}
```

## Custom Headers Configuration

### Basic Headers

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
headers_config = {
    "headers": {
        "Authorization": "Bearer your-api-key",
        "Content-Type": "application/json",
        "User-Agent": "PraisonAI/1.0",
        "X-Custom-Header": "custom-value"
    }
}
```

### Dynamic Headers

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
import uuid

def generate_headers(request_type, model, **kwargs):
    """Generate headers dynamically based on request"""
    headers = {
        "User-Agent": f"PraisonAI/1.0 ({request_type})",
        "X-Model": model,
        "X-Request-ID": str(uuid.uuid4()),
        "X-Client-Version": "1.0.0"
    }
    
    # Add authentication
    if api_key := kwargs.get('api_key'):
        headers["Authorization"] = f"Bearer {api_key}"
    
    # Add custom headers for specific providers
    if "anthropic" in model:
        headers["anthropic-version"] = "2023-06-01"
    elif "openai" in model:
        headers["OpenAI-Beta"] = "assistants=v1"
    
    return headers

llm_config = {
    "headers_generator": generate_headers,
    "static_headers": {
        "X-Environment": "production"
    }
}
```

### Provider-Specific Headers

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
import time

# OpenAI specific headers
openai_headers = {
    "OpenAI-Organization": "org-xxx",
    "OpenAI-Beta": "assistants=v1",
    "X-Request-ID": "unique-request-id"
}

# Anthropic specific headers
anthropic_headers = {
    "anthropic-version": "2023-06-01",
    "X-Request-Source": "praisonai"
}

# Custom authentication headers
custom_auth_headers = {
    "X-API-Key": "your-api-key",
    "X-API-Secret": "your-secret",
    "X-Timestamp": str(int(time.time())),
    "X-Signature": "generated-signature"
}
```

## Advanced LLM Configuration

### Load Balancing

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
load_balancing_config = {
    "strategy": "round_robin",  # or "least_latency", "weighted"
    "endpoints": [
        {
            "url": "https://api.openai.com/v1",
            "weight": 0.6,
            "models": ["gpt-4o", "gpt-3.5-turbo"]
        },
        {
            "url": "https://api.anthropic.com",
            "weight": 0.4,
            "models": ["claude-3-sonnet"]
        }
    ],
    "health_check": {
        "enabled": True,
        "interval": 60,
        "timeout": 5,
        "failure_threshold": 3
    }
}
```

### Model Fallback Configuration

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
fallback_config = {
    "primary_model": "gpt-4o",
    "fallback_chain": [
        {
            "model": "gpt-4-turbo",
            "condition": "rate_limit",
            "max_attempts": 2
        },
        {
            "model": "gpt-3.5-turbo",
            "condition": "any_error",
            "temperature_adjustment": -0.2  # More deterministic
        },
        {
            "model": "claude-3-sonnet",
            "condition": "repeated_failure",
            "provider_switch": True
        }
    ],
    "fallback_strategy": "progressive",  # or "immediate"
    "preserve_context": True
}
```

### Request Optimization

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
optimization_config = {
    # Request batching
    "batching": {
        "enabled": True,
        "max_batch_size": 10,
        "batch_timeout": 0.1,  # seconds
        "dynamic_batching": True
    },
    
    # Response streaming
    "streaming": {
        "enabled": True,
        "chunk_size": 100,
        "buffer_size": 1000,
        "timeout_per_chunk": 30
    },
    
    # Caching
    "cache": {
        "enabled": True,
        "ttl": 3600,
        "max_size": 1000,
        "key_strategy": "semantic",  # or "exact"
        "similarity_threshold": 0.95
    },
    
    # Token optimization
    "token_optimization": {
        "compress_prompts": True,
        "remove_redundancy": True,
        "dynamic_max_tokens": True,
        "reserve_completion_tokens": 500
    }
}
```

## Rate Limiting Configuration

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
rate_limit_config = {
    "rate_limits": {
        "requests_per_minute": 60,
        "tokens_per_minute": 90000,
        "requests_per_day": 10000
    },
    "rate_limit_strategy": "adaptive",  # or "fixed", "burst"
    "burst_config": {
        "burst_size": 10,
        "refill_rate": 1.0  # per second
    },
    "quota_management": {
        "track_usage": True,
        "warn_at_percentage": 80,
        "hard_limit_behavior": "queue"  # or "reject", "fallback"
    }
}
```

## Complete Configuration Example

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
from praisonaiagents import Agent

# Comprehensive LLM configuration
agent = Agent(
    name="ProductionAgent",
    llm="gpt-4o",
    llm={
        # Model settings
        "temperature": 0.7,
        "max_tokens": 4000,
        "top_p": 0.9,
        "presence_penalty": 0.1,
        "frequency_penalty": 0.1,
        
        # Timeout configuration
        "timeout": 60,
        "timeout_config": {
            "connect": 5,
            "read": 60,
            "dynamic": True
        },
        
        # Retry configuration
        "max_retries": 5,
        "retry_delay": 2.0,
        "retry_multiplier": 2.0,
        "retry_on_status": [429, 500, 502, 503],
        
        # Headers
        "headers": {
            "User-Agent": "PraisonAI/1.0",
            "X-Request-Source": "production"
        },
        
        # Advanced features
        "streaming": True,
        "cache_enabled": True,
        "fallback_models": ["gpt-3.5-turbo"],
        
        # Rate limiting
        "rate_limit_config": {
            "requests_per_minute": 60,
            "adaptive": True
        }
    }
)
```

## Environment Variables

```bash theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
# Basic LLM settings
export OPENAI_API_KEY="sk-..."
export OPENAI_MODEL="gpt-4o"
export OPENAI_TEMPERATURE="0.7"

# Timeout settings
export PRAISONAI_LLM_TIMEOUT="60"
export PRAISONAI_LLM_CONNECT_TIMEOUT="5"
export PRAISONAI_LLM_READ_TIMEOUT="60"

# Retry settings
export PRAISONAI_LLM_MAX_RETRIES="3"
export PRAISONAI_LLM_RETRY_DELAY="2"
export PRAISONAI_LLM_RETRY_MULTIPLIER="2"

# Headers
export PRAISONAI_LLM_USER_AGENT="PraisonAI/1.0"
export PRAISONAI_LLM_CUSTOM_HEADERS='{"X-Custom": "value"}'

# Advanced settings
export PRAISONAI_LLM_STREAMING="true"
export PRAISONAI_LLM_CACHE_ENABLED="true"
export PRAISONAI_LLM_RATE_LIMIT="60"
```

## Monitoring and Debugging

```python theme={"theme":{"light":"vitesse-light","dark":"vitesse-dark"}}
monitoring_config = {
    "logging": {
        "log_requests": True,
        "log_responses": True,
        "log_level": "INFO",
        "sanitize_keys": ["api_key", "authorization"]
    },
    "metrics": {
        "track_latency": True,
        "track_tokens": True,
        "track_costs": True,
        "export_interval": 60
    },
    "debugging": {
        "capture_raw_responses": False,
        "validate_responses": True,
        "break_on_error": False
    }
}
```

## See Also

* [Model Configuration](/models) - Supported models and providers
* [Agent Configuration](/configuration/agent-config) - Agent-level LLM settings
* [Best Practices](/configuration/best-practices) - LLM configuration guidelines
