Skip to main content
Optimize agent outputs using the evaluation system to measure, compare, and improve results.

Quick Start

1

Evaluate Agent Output

use praisonai::{Agent, AccuracyEvaluator};

// Create agent
let agent = Agent::new()
    .name("Writer")
    .instructions("Write concise summaries")
    .build()?;

// Get output
let output = agent.start("Summarize quantum computing").await?;

// Evaluate accuracy
let evaluator = AccuracyEvaluator::new()
    .input("Summarize quantum computing")
    .expected("Quantum computing uses qubits for parallel processing")
    .threshold(0.7)
    .build();

let result = evaluator.evaluate_simple(&output);
println!("Score: {} | Passed: {}", result.score.value, result.passed);
2

Criteria-Based Evaluation

use praisonai::{CriteriaEvaluator, CriteriaScore};
use std::collections::HashMap;

let evaluator = CriteriaEvaluator::new()
    .criterion("accuracy")
    .criterion("clarity")
    .criterion("completeness")
    .threshold(0.7)
    .build();

// Score each criterion
let mut scores = HashMap::new();
scores.insert("accuracy".to_string(), 0.9);
scores.insert("clarity".to_string(), 0.8);
scores.insert("completeness".to_string(), 0.75);

let result = evaluator.evaluate(&scores);
println!("Overall: {} | Passed: {}", result.score.value, result.passed);

User Interaction Flow


AccuracyEvaluator

Compare output against expected results.
pub struct AccuracyEvaluator {
    input: String,
    expected: String,
    config: EvaluatorConfig,
}

Builder Methods

MethodSignatureDescription
new()fn new() -> AccuracyEvaluatorBuilderCreate builder
input(text)fn input(impl Into<String>) -> SelfSet input
expected(text)fn expected(impl Into<String>) -> SelfSet expected output
threshold(n)fn threshold(f64) -> SelfPass threshold (0.0-1.0)
build()fn build(self) -> AccuracyEvaluatorBuild evaluator

Evaluation

let result = evaluator.evaluate_simple(&actual_output);
// result.score.value = 0.0-1.0
// result.passed = true/false

CriteriaEvaluator

Evaluate against custom criteria with weighted scores.
pub struct CriteriaEvaluator {
    criteria: Vec<String>,
    config: EvaluatorConfig,
}

Builder Methods

MethodSignatureDescription
new()fn new() -> CriteriaEvaluatorBuilderCreate builder
criterion(name)fn criterion(impl Into<String>) -> SelfAdd criterion
threshold(n)fn threshold(f64) -> SelfPass threshold
build()fn build(self) -> CriteriaEvaluatorBuild evaluator

Example

use praisonai::CriteriaEvaluator;
use std::collections::HashMap;

let evaluator = CriteriaEvaluator::new()
    .criterion("relevance")
    .criterion("coherence")
    .threshold(0.75)
    .build();

let mut scores = HashMap::new();
scores.insert("relevance".to_string(), 0.9);
scores.insert("coherence".to_string(), 0.8);

let result = evaluator.evaluate(&scores);

PerformanceEvaluator

Measure execution performance.
pub struct PerformanceEvaluator {
    max_duration: Duration,
    max_ttft: Option<Duration>,
    config: EvaluatorConfig,
}

Configuration

OptionTypeDefaultDescription
max_durationDuration30sMaximum allowed time
max_ttftOption<Duration>NoneMax time-to-first-token
thresholdf640.7Pass threshold

Example

use praisonai::{PerformanceEvaluator, PerformanceMetrics};
use std::time::Duration;

let evaluator = PerformanceEvaluator::new()
    .max_duration(Duration::from_secs(10))
    .threshold(0.8)
    .build();

let metrics = PerformanceMetrics::new(Duration::from_secs(5));
let result = evaluator.evaluate(&metrics);

Judge

LLM-based evaluation for complex judgments.
pub struct Judge {
    pub name: String,
    pub config: JudgeConfig,
    pub threshold: f64,
}

Configuration

OptionTypeDefaultDescription
modelString"gpt-4o-mini"Model for judging
temperaturef640.0LLM temperature
system_promptOption<String>NoneCustom system prompt

Example

use praisonai::Judge;

let judge = Judge::new("quality-judge")
    .with_threshold(0.8);

let result = judge.judge(
    "Explain quantum computing",
    &agent_output,
    Some("Expected explanation of qubits and superposition")
);

println!("Score: {} | Reason: {}", result.score, result.reasoning);

Optimization Loop Pattern

use praisonai::{Agent, AccuracyEvaluator};

let agent = Agent::new()
    .name("Writer")
    .build()?;

let evaluator = AccuracyEvaluator::new()
    .expected("Clear, concise explanation")
    .threshold(0.8)
    .build();

let mut output = agent.start("Explain AI").await?;
let mut result = evaluator.evaluate_simple(&output);

// Iterate until passing
while !result.passed {
    let feedback = format!(
        "Previous score: {}. Improve clarity and accuracy.",
        result.score.value
    );
    output = agent.start(&feedback).await?;
    result = evaluator.evaluate_simple(&output);
}

println!("Final output (score {}): {}", result.score.value, output);

Best Practices

Use specific, measurable criteria for consistent evaluation.
Start with 0.7-0.8 threshold and adjust based on use case.
Use AccuracyEvaluator + PerformanceEvaluator for complete picture.
Track scores across iterations to identify improvement patterns.