Advanced retrieval patterns with quality scoring, reranking, and confidence-based filtering
from praisonaiagents import Agent, Memory
# Create agent with quality-based memory
agent = Agent(
name="Quality RAG Agent",
memory=Memory(
provider="mem0",
config={
"llm": {
"provider": "openai",
"model": "gpt-4o-mini"
},
"quality_threshold": 0.7, # Minimum quality score
"auto_quality_check": True # Enable automatic quality assessment
}
)
)
# Quality metrics are automatically calculated for stored content
# - Completeness: How complete is the information?
# - Relevance: How relevant to the context?
# - Clarity: How clear and understandable?
# - Accuracy: How accurate and factual?
from praisonaiagents import Memory
class QualityFilteredMemory(Memory):
def add(self, content, metadata=None):
# Calculate quality score
quality_score = self._calculate_quality_score(content)
# Only store if quality meets threshold
if quality_score >= self.config.get("quality_threshold", 0.7):
super().add(content, metadata)
return True
else:
print(f"Content rejected: quality score {quality_score:.2f}")
return False
def _calculate_quality_score(self, content):
# Uses LLM to evaluate quality metrics
metrics = self._evaluate_quality_metrics(content)
return sum(metrics.values()) / len(metrics)
# Use quality-filtered memory
memory = QualityFilteredMemory(
provider="mem0",
config={
"quality_threshold": 0.8,
"metrics_weights": {
"completeness": 0.3,
"relevance": 0.3,
"clarity": 0.2,
"accuracy": 0.2
}
}
)
from praisonaiagents import Knowledge
# Enable reranking in Knowledge base
knowledge = Knowledge(
data="path/to/documents",
config={
"reranker": {
"enabled": True,
"model": "BAAI/bge-reranker-v2-m3", # Or any cross-encoder
"top_k": 5, # Return top 5 after reranking
"score_threshold": 0.7
}
}
)
# Search with reranking
results = knowledge.search(
query="What are the key features of the product?",
rerank=True,
rerank_top_k=10 # Rerank top 10 initial results
)
# Results are ordered by reranking scores
for result in results:
print(f"Score: {result['rerank_score']:.3f} - {result['text'][:100]}...")
from praisonaiagents import Agent, Knowledge
class MultiStageRAGAgent(Agent):
def __init__(self, name, knowledge_base):
super().__init__(name=name)
self.knowledge = knowledge_base
def retrieve_with_quality(self, query, stages=3):
# Stage 1: Initial broad retrieval
initial_results = self.knowledge.search(
query=query,
top_k=50, # Get more candidates
similarity_threshold=0.6 # Lower threshold
)
# Stage 2: Quality filtering
quality_filtered = [
r for r in initial_results
if r.get("quality_score", 0) > 0.7
]
# Stage 3: Reranking
if len(quality_filtered) > 5:
reranked = self.knowledge.rerank(
query=query,
documents=quality_filtered,
top_k=5
)
return reranked
return quality_filtered[:5]
# Use multi-stage retrieval
agent = MultiStageRAGAgent(
name="Advanced RAG",
knowledge_base=knowledge
)
from praisonaiagents import Knowledge
import numpy as np
class ConfidenceRAG:
def __init__(self, knowledge_base):
self.kb = knowledge_base
def search_with_confidence(self, query, min_confidence=0.8):
# Get results with scores
results = self.kb.search(query, return_scores=True)
# Calculate confidence based on score distribution
scores = [r["score"] for r in results]
mean_score = np.mean(scores)
std_score = np.std(scores)
# Filter by confidence
confident_results = []
for result in results:
# Confidence based on z-score
z_score = (result["score"] - mean_score) / std_score
confidence = 1 / (1 + np.exp(-z_score)) # Sigmoid
if confidence >= min_confidence:
result["confidence"] = confidence
confident_results.append(result)
return confident_results
from praisonaiagents import Knowledge
# Configure hybrid search
knowledge = Knowledge(
data="documents/",
config={
"hybrid_search": {
"enabled": True,
"semantic_weight": 0.7,
"keyword_weight": 0.3,
"quality_weight": 0.2 # Additional quality component
}
}
)
# Perform hybrid search
results = knowledge.search(
query="machine learning algorithms",
keyword_search=True, # Enable keyword component
semantic_search=True, # Enable semantic component
quality_filter=True, # Apply quality filtering
top_k=10
)
# Results are scored by weighted combination
for result in results:
print(f"Hybrid Score: {result['hybrid_score']:.3f}")
print(f" - Semantic: {result['semantic_score']:.3f}")
print(f" - Keyword: {result['keyword_score']:.3f}")
print(f" - Quality: {result['quality_score']:.3f}")
from praisonaiagents import Memory
from datetime import datetime
import json
class MonitoredMemory(Memory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.quality_history = []
def add_with_tracking(self, content, metadata=None):
# Calculate quality metrics
metrics = self._evaluate_quality_metrics(content)
# Track metrics
self.quality_history.append({
"timestamp": datetime.now().isoformat(),
"metrics": metrics,
"content_length": len(content),
"metadata": metadata
})
# Store if quality is sufficient
if sum(metrics.values()) / len(metrics) >= 0.7:
return self.add(content, metadata)
return False
def get_quality_report(self):
if not self.quality_history:
return "No quality data available"
# Calculate averages
avg_metrics = {
"completeness": 0,
"relevance": 0,
"clarity": 0,
"accuracy": 0
}
for entry in self.quality_history:
for metric, value in entry["metrics"].items():
avg_metrics[metric] += value
for metric in avg_metrics:
avg_metrics[metric] /= len(self.quality_history)
return {
"total_entries": len(self.quality_history),
"average_metrics": avg_metrics,
"quality_trend": self._calculate_trend()
}
from praisonaiagents import Agent, Memory
class AdaptiveQualityAgent(Agent):
def __init__(self, name):
super().__init__(
name=name,
memory=Memory(provider="mem0")
)
self.quality_thresholds = {
"critical": 0.9, # Critical information
"important": 0.8, # Important facts
"general": 0.7, # General knowledge
"casual": 0.6 # Casual conversation
}
def store_with_context(self, content, context_type="general"):
# Get appropriate threshold
threshold = self.quality_thresholds.get(context_type, 0.7)
# Temporarily adjust memory threshold
original_threshold = self.memory.config.get("quality_threshold", 0.7)
self.memory.config["quality_threshold"] = threshold
# Store with context-appropriate threshold
result = self.memory.add(
content,
metadata={"context_type": context_type}
)
# Restore original threshold
self.memory.config["quality_threshold"] = original_threshold
return result
from praisonaiagents import Knowledge
class QueryExpansionRAG:
def __init__(self, knowledge_base, llm):
self.kb = knowledge_base
self.llm = llm
def search_with_expansion(self, query):
# Generate query variations
prompt = f"""Generate 3 alternative phrasings for this query:
Query: {query}
Alternatives:"""
variations = self.llm.generate(prompt)
queries = [query] + self._parse_variations(variations)
# Search with all queries
all_results = []
for q in queries:
results = self.kb.search(q, top_k=5)
all_results.extend(results)
# Deduplicate and rerank by quality
unique_results = self._deduplicate(all_results)
return self._rerank_by_quality(unique_results, query)
# For factual/critical information
memory_config = {
"quality_threshold": 0.85,
"metrics_weights": {
"accuracy": 0.4, # Prioritize accuracy
"completeness": 0.3,
"relevance": 0.2,
"clarity": 0.1
}
}
# For creative/general content
memory_config = {
"quality_threshold": 0.7,
"metrics_weights": {
"relevance": 0.4, # Prioritize relevance
"clarity": 0.3,
"completeness": 0.2,
"accuracy": 0.1
}
}
# Track retrieval metrics
class RAGMonitor:
def __init__(self):
self.queries = []
self.results = []
def track_query(self, query, results, user_feedback=None):
self.queries.append({
"query": query,
"timestamp": datetime.now(),
"result_count": len(results),
"avg_quality": sum(r.get("quality_score", 0) for r in results) / len(results),
"user_feedback": user_feedback
})
def get_performance_metrics(self):
return {
"total_queries": len(self.queries),
"avg_results": sum(q["result_count"] for q in self.queries) / len(self.queries),
"avg_quality": sum(q["avg_quality"] for q in self.queries) / len(self.queries)
}
# Fallback when quality is too low
def search_with_fallback(knowledge, query):
# Try with high quality threshold
results = knowledge.search(
query,
quality_threshold=0.8,
top_k=5
)
if len(results) < 3:
# Fallback to lower threshold
results = knowledge.search(
query,
quality_threshold=0.6,
top_k=5
)
if len(results) == 0:
# Final fallback - no quality filter
results = knowledge.search(
query,
quality_threshold=0.0,
top_k=3
)
return results