Skip to main content
PraisonAI includes a powerful profiling module for measuring and analyzing agent performance. Profile function execution, API calls, streaming latency, memory usage, and more.

Quick Start

from praisonai.profiler import Profiler, profile

# Enable profiling
Profiler.enable()

# Profile a function
@profile
def my_agent_task():
    # Your agent code here
    pass

# Profile a block of code
with Profiler.block("agent_initialization"):
    agent = Agent(instructions="You are helpful")

# Get report
Profiler.report()

Features

Function Profiling

Measure execution time of any function with decorators

API Call Profiling

Track wall-clock time for HTTP/API calls

Streaming Profiling

Measure Time To First Token (TTFT) and total streaming time

Memory Profiling

Track memory usage with tracemalloc integration

Statistics

Get p50, p95, p99 percentiles and statistical analysis

Export

Export reports as JSON, HTML, or SVG flamegraphs

Environment Variable

Enable profiling globally via environment variable:
export PRAISONAI_PROFILE=1

Function Profiling

Basic Decorator

from praisonai.profiler import profile, profile_async

@profile
def sync_function():
    """Automatically profiled when enabled."""
    return "result"

@profile_async
async def async_function():
    """Profile async functions."""
    return await some_async_call()

# With custom category
@profile(category="llm_call")
def call_llm():
    pass

Block Profiling

from praisonai.profiler import Profiler

with Profiler.block("data_processing"):
    # Code to profile
    process_data()

with Profiler.block("model_inference", category="inference"):
    result = model.predict(data)

API Call Profiling

Track HTTP/API call latency with wall-clock time:
from praisonai.profiler import Profiler, profile_api

# Using decorator
@profile_api(endpoint="openai/chat/completions")
def call_openai():
    response = client.chat.completions.create(...)
    return response

# Using context manager
with Profiler.api_call("https://api.openai.com/v1/chat/completions", method="POST") as call:
    response = requests.post(url, json=data)
    call['status_code'] = response.status_code
    call['response_size'] = len(response.content)

# Get all API calls
api_calls = Profiler.get_api_calls()
for call in api_calls:
    print(f"{call.endpoint}: {call.duration_ms:.2f}ms")

Streaming Profiling

Measure Time To First Token (TTFT) and streaming performance:
from praisonai.profiler import Profiler, StreamingTracker

# Using context manager
with Profiler.streaming("chat_completion") as tracker:
    for chunk in stream:
        if tracker._first_token_time is None:
            tracker.first_token()  # Mark TTFT
        tracker.chunk()
        process(chunk)

# Manual tracking
tracker = StreamingTracker("my_stream")
tracker.start()

for i, chunk in enumerate(response_stream):
    if i == 0:
        tracker.first_token()
    tracker.chunk()
    
tracker.end(total_tokens=150)

# Get streaming records
streams = Profiler.get_streaming_records()
for s in streams:
    print(f"TTFT: {s.ttft_ms:.2f}ms, Total: {s.total_ms:.2f}ms, Chunks: {s.chunk_count}")

Memory Profiling

Track memory usage with tracemalloc:
from praisonai.profiler import Profiler

# Profile memory for a block
with Profiler.memory("agent_creation"):
    agent = Agent(instructions="...", tools=[...])

# Get memory records
memories = Profiler.get_memory_records()
for m in memories:
    print(f"{m.name}: current={m.current_kb:.1f}KB, peak={m.peak_kb:.1f}KB")

# Take a snapshot
snapshot = Profiler.memory_snapshot()
print(f"Current: {snapshot['current_kb']:.1f}KB")
print(f"Peak: {snapshot['peak_kb']:.1f}KB")

Statistics

Get statistical analysis of profiling data:
from praisonai.profiler import Profiler

# Get overall statistics
stats = Profiler.get_statistics()
print(f"P50 (Median): {stats['p50']:.2f}ms")
print(f"P95: {stats['p95']:.2f}ms")
print(f"P99: {stats['p99']:.2f}ms")
print(f"Mean: {stats['mean']:.2f}ms")
print(f"Std Dev: {stats['std_dev']:.2f}ms")

# Get statistics for specific category
api_stats = Profiler.get_statistics(category="api")
llm_stats = Profiler.get_statistics(category="llm_call")

cProfile Integration

For detailed function-level profiling:
from praisonai.profiler import Profiler, profile_detailed

# Using decorator
@profile_detailed
def heavy_computation():
    return sum(i * i for i in range(100000))

# Using context manager
with Profiler.cprofile("agent_run") as stats:
    result = agent.run()

# Get cProfile stats
cprofile_data = Profiler.get_cprofile_stats()
for entry in cprofile_data:
    print(f"Operation: {entry['name']}")
    print(entry['stats'])

Line-Level Profiling

Profile individual lines (requires line_profiler package):
from praisonai.profiler import profile_lines

@profile_lines
def detailed_function():
    a = expensive_operation_1()  # Line timing
    b = expensive_operation_2()  # Line timing
    return a + b

# Get line profile data
line_data = Profiler.get_line_profile_data()
Install line_profiler for full functionality: pip install line_profiler

Reports and Export

Console Report

from praisonai.profiler import Profiler

# Print to console
Profiler.report()

# Get as string
report_text = Profiler.report(output="string")

JSON Export

# Export as JSON string
json_report = Profiler.export_json()

# Save to file
Profiler.export_to_file("profile_report.json", format="json")

HTML Export

# Export as HTML string
html_report = Profiler.export_html()

# Save to file
Profiler.export_to_file("profile_report.html", format="html")

Flamegraph

# Export flamegraph as SVG
Profiler.export_flamegraph("profile.svg")
For production flamegraphs, use py-spy:
py-spy record -o profile.svg -- python your_script.py

Import Profiling

Profile module import times:
from praisonai.profiler import profile_imports, time_import

# Profile imports in a block
with profile_imports() as profiler:
    import pandas
    import numpy
    from praisonaiagents import Agent

# Get slowest imports
slowest = profiler.get_slowest(n=5)
for imp in slowest:
    print(f"{imp.module}: {imp.duration_ms:.2f}ms")

# Quick single import timing
duration = time_import("torch")
print(f"torch import: {duration:.2f}ms")

Zero Performance Impact

When profiling is disabled, there is zero performance overhead:
from praisonai.profiler import Profiler, profile

Profiler.disable()  # Profiling off

@profile
def fast_function():
    return 1 + 1

# No overhead - decorator is a no-op when disabled
for _ in range(1000000):
    fast_function()  # Full speed

Complete Example

from praisonai.profiler import Profiler, profile, profile_api
from praisonaiagents import Agent

# Enable profiling
Profiler.enable()

@profile_api(endpoint="openai/chat")
def create_completion(prompt):
    return client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )

@profile(category="agent")
def run_agent(task):
    with Profiler.memory("agent_init"):
        agent = Agent(instructions="You are helpful")
    
    with Profiler.block("agent_execution"):
        result = agent.chat(task)
    
    return result

# Run with profiling
result = run_agent("Explain quantum computing")

# Get comprehensive report
print("\n=== Profiling Report ===")
Profiler.report()

# Get statistics
stats = Profiler.get_statistics()
print(f"\nP95 Latency: {stats['p95']:.2f}ms")

# Export detailed report
Profiler.export_to_file("agent_profile.html", format="html")
Profiler.export_flamegraph("agent_flamegraph.svg")

# Cleanup
Profiler.disable()
Profiler.clear()

API Reference

Profiler Class Methods

MethodDescription
enable()Enable profiling
disable()Disable profiling
clear()Clear all profiling data
is_enabled()Check if profiling is enabled
record_timing()Record a timing measurement
record_api_call()Record an API call
record_streaming()Record streaming metrics
record_memory()Record memory usage
block()Context manager for block profiling
api_call()Context manager for API call profiling
streaming()Context manager for streaming profiling
memory()Context manager for memory profiling
cprofile()Context manager for cProfile
get_statistics()Get statistical analysis
get_summary()Get profiling summary
report()Generate console report
export_json()Export as JSON
export_html()Export as HTML
export_flamegraph()Export as SVG flamegraph

Decorators

DecoratorDescription
@profileProfile sync function
@profile_asyncProfile async function
@profile_apiProfile as API call
@profile_api_asyncProfile async API call
@profile_detailedProfile with cProfile
@profile_linesLine-level profiling

Data Classes

ClassFields
TimingRecordname, duration_ms, category, file, line
APICallRecordendpoint, method, duration_ms, status_code
StreamingRecordname, ttft_ms, total_ms, chunk_count, total_tokens
MemoryRecordname, current_kb, peak_kb