Platform/DevOps Persona
Role : Deploy, scale, secure, and monitor PraisonAI recipe infrastructure for production workloads.
Primary Goals
Deploy reliably with zero-downtime updates
Scale horizontally to handle load spikes
Secure endpoints with authentication and rate limiting
Monitor health with metrics, logs, and alerts
Recommended Integration Models
Typical Workflow
Set Up Infrastructure
# docker-compose.yml
version : '3.8'
services :
praisonai-runner :
image : praisonai/runner:latest
ports :
- "8765:8765"
environment :
- OPENAI_API_KEY=${OPENAI_API_KEY}
- PRAISONAI_API_KEY=${PRAISONAI_API_KEY}
- PRAISONAI_AUTH=api-key
volumes :
- ./recipes:/root/.praison/templates
healthcheck :
test : [ "CMD" , "curl" , "-f" , "http://localhost:8765/health" ]
interval : 30s
timeout : 10s
retries : 3
deploy :
replicas : 3
resources :
limits :
memory : 2G
cpus : '2'
Configure Authentication
# serve.yaml
host : 0.0.0.0
port : 8765
auth : api-key
api_key : ${PRAISONAI_API_KEY}
# Rate limiting
rate_limit : 100 # requests per minute per client
# Request limits
max_request_size : 10485760 # 10MB
# CORS (if needed)
cors_origins : "https://app.example.com"
Set Up Load Balancer
# nginx.conf
upstream praisonai {
least_conn ;
server runner1:8765 weight = 1 ;
server runner2:8765 weight = 1 ;
server runner3:8765 weight = 1 ;
keepalive 32 ;
}
server {
listen 443 ssl http2;
server_name api.example.com;
ssl_certificate /etc/ssl/certs/cert.pem;
ssl_certificate_key /etc/ssl/private/key.pem;
# Security headers
add_header X-Content-Type-Options nosniff;
add_header X-Frame-Options DENY;
add_header X-XSS-Protection "1; mode=block" ;
location / {
proxy_pass http://praisonai;
proxy_http_version 1.1 ;
proxy_set_header Connection "" ;
proxy_set_header Host $ host ;
proxy_set_header X-Real-IP $ remote_addr ;
proxy_set_header X-Forwarded-For $ proxy_add_x_forwarded_for ;
proxy_read_timeout 300s ;
proxy_send_timeout 300s ;
}
# SSE streaming
location /v1/recipes/stream {
proxy_pass http://praisonai;
proxy_http_version 1.1 ;
proxy_set_header Connection "" ;
proxy_buffering off ;
proxy_cache off ;
chunked_transfer_encoding off ;
}
}
Configure Monitoring
# prometheus.yml
global :
scrape_interval : 15s
scrape_configs :
- job_name : 'praisonai'
static_configs :
- targets : [ 'runner1:8765' , 'runner2:8765' , 'runner3:8765' ]
metrics_path : /metrics
# alertmanager rules
groups :
- name : praisonai
rules :
- alert : HighErrorRate
expr : rate(praisonai_recipe_errors_total[5m]) > 0.1
for : 5m
labels :
severity : warning
annotations :
summary : "High recipe error rate"
- alert : SlowRecipes
expr : histogram_quantile(0.95, praisonai_recipe_duration_seconds) > 30
for : 10m
labels :
severity : warning
annotations :
summary : "Recipe execution slow"
Set Up Logging
# fluent-bit.conf
[ INPUT ]
Name tail
Path /var/log/praisonai/*.log
Parser json
Tag praisonai.*
[ FILTER ]
Name modify
Match praisonai.*
Add service praisonai
Add environment production
[ OUTPUT ]
Name elasticsearch
Match praisonai.*
Host elasticsearch
Port 9200
Index praisonai-logs
Key Concerns
High Availability
# Kubernetes deployment with anti-affinity
apiVersion : apps/v1
kind : Deployment
metadata :
name : praisonai-runner
spec :
replicas : 3
selector :
matchLabels :
app : praisonai-runner
template :
metadata :
labels :
app : praisonai-runner
spec :
affinity :
podAntiAffinity :
preferredDuringSchedulingIgnoredDuringExecution :
- weight : 100
podAffinityTerm :
labelSelector :
matchLabels :
app : praisonai-runner
topologyKey : kubernetes.io/hostname
containers :
- name : runner
image : praisonai/runner:latest
ports :
- containerPort : 8765
livenessProbe :
httpGet :
path : /health
port : 8765
initialDelaySeconds : 10
periodSeconds : 10
readinessProbe :
httpGet :
path : /health
port : 8765
initialDelaySeconds : 5
periodSeconds : 5
Security Hardening
# serve.yaml - Production security
host : 0.0.0.0
port : 8765
# Authentication
auth : api-key
api_key : ${PRAISONAI_API_KEY}
# Rate limiting
rate_limit : 100
rate_limit_burst : 20
# Request limits
max_request_size : 10485760
request_timeout : 300
# Disable admin endpoints in production
enable_admin : false
enable_metrics : true
# Logging
log_level : info
log_format : json
Scaling Strategy
# Kubernetes HPA
apiVersion : autoscaling/v2
kind : HorizontalPodAutoscaler
metadata :
name : praisonai-runner-hpa
spec :
scaleTargetRef :
apiVersion : apps/v1
kind : Deployment
name : praisonai-runner
minReplicas : 2
maxReplicas : 10
metrics :
- type : Resource
resource :
name : cpu
target :
type : Utilization
averageUtilization : 70
- type : Resource
resource :
name : memory
target :
type : Utilization
averageUtilization : 80
Operational Runbooks
Health Check
# Check all runners
for host in runner1 runner2 runner3 ; do
echo "Checking $host ..."
curl -s "http:// $host :8765/health" | jq .
done
# Using CLI
praisonai endpoints health --url http://runner1:8765
Rolling Restart
# Kubernetes
kubectl rollout restart deployment/praisonai-runner
# Docker Compose
docker-compose up -d --no-deps --build praisonai-runner
Log Analysis
# Find errors in last hour
kubectl logs -l app=praisonai-runner --since=1h | grep -i error
# Recipe execution times
kubectl logs -l app=praisonai-runner | \
jq -r 'select(.event=="recipe_completed") | "\(.recipe): \(.duration_ms)ms"'
Troubleshooting
Check for memory leaks in long-running recipes
Implement request size limits
Add memory limits to containers
Monitor with kubectl top pods
Increase proxy timeouts for long recipes
Check network policies
Verify health checks are passing
Review load balancer configuration
Adjust rate limits based on traffic patterns
Implement client-specific limits
Add burst allowance for spikes
Monitor rate limit metrics
Verify certificate chain is complete
Check certificate expiration
Ensure correct SNI configuration
Test with openssl s_client
Security Checklist
Monitoring Dashboard
Key metrics to display:
Metric Description Alert Threshold praisonai_recipe_totalTotal recipe invocations - praisonai_recipe_errors_totalError count > 5% of total praisonai_recipe_duration_secondsExecution time p95 > 30s praisonai_active_sessionsConcurrent sessions > 80% capacity praisonai_rate_limit_exceededRate limit hits > 10/min
Next Steps