Documentation Index
Fetch the complete documentation index at: https://docs.praison.ai/llms.txt
Use this file to discover all available pages before exploring further.
Index Types Module
The index module provides different indexing strategies for document retrieval.
Quick Start
from praisonaiagents.knowledge.index import (
IndexType,
IndexStats,
IndexProtocol,
IndexRegistry,
get_index_registry,
KeywordIndex
)
# Use built-in keyword index (BM25, no external deps)
index = KeywordIndex()
# Add documents
index.add_documents([
"Python is a programming language",
"Machine learning with Python",
"Java enterprise development"
])
# Query
results = index.query("Python programming", top_k=2)
for result in results:
print(f"{result['text']} (score: {result['score']})")
Index Types
IndexType Enum
from praisonaiagents.knowledge.index import IndexType
class IndexType(Enum):
VECTOR = "vector" # Semantic similarity
KEYWORD = "keyword" # BM25 keyword matching
HYBRID = "hybrid" # Vector + Keyword combined
GRAPH = "graph" # Knowledge graph (placeholder)
Type Comparison
| Type | Method | Best For |
|---|
vector | Semantic embeddings | Conceptual queries |
keyword | BM25 term matching | Exact term queries |
hybrid | Combined scoring | General purpose |
graph | Entity relationships | Connected data |
Classes
IndexStats
Statistics about an index.
@dataclass
class IndexStats:
document_count: int
index_type: IndexType
metadata: Dict[str, Any] = field(default_factory=dict)
IndexProtocol
Protocol for index implementations.
class IndexProtocol(Protocol):
name: str
index_type: IndexType
def add_documents(
self,
documents: List[str],
metadatas: Optional[List[Dict[str, Any]]] = None,
ids: Optional[List[str]] = None
) -> List[str]:
"""Add documents to the index."""
...
def query(
self,
query: str,
top_k: int = 10,
**kwargs
) -> List[Dict[str, Any]]:
"""Query the index."""
...
def get_stats(self) -> IndexStats:
"""Get index statistics."""
...
def clear(self) -> None:
"""Clear the index."""
...
KeywordIndex
Built-in BM25 keyword index (no external dependencies).
from praisonaiagents.knowledge.index import KeywordIndex
index = KeywordIndex()
# Add documents
index.add_documents([
"Introduction to Python programming",
"Advanced Python techniques",
"Java for beginners"
])
# Query with BM25 scoring
results = index.query("Python", top_k=2)
# Returns documents ranked by BM25 score
# Get statistics
stats = index.get_stats()
print(f"Documents: {stats.document_count}")
IndexRegistry
Registry for managing index implementations.
from praisonaiagents.knowledge.index import get_index_registry
registry = get_index_registry()
# List available indices
indices = registry.list_indices() # ['keyword', 'vector', ...]
# Get index by name
index = registry.get("keyword")
# Register custom index
registry.register("custom", MyIndex)
Using with Knowledge
from praisonaiagents import Agent, Knowledge
# Configure index type
agent = Agent(
instructions="You are a helpful assistant",
knowledge={
"sources": ["docs/"],
"index_type": "hybrid" # or "vector", "keyword"
}
)
response = agent.chat("Find exact term 'API endpoint'")
Creating Custom Indices
from praisonaiagents.knowledge.index import (
IndexType,
IndexStats,
get_index_registry
)
from typing import List, Dict, Any, Optional
class MyIndex:
name = "my_index"
index_type = IndexType.KEYWORD
def __init__(self, **config):
self.documents = []
def add_documents(
self,
documents: List[str],
metadatas: Optional[List[Dict[str, Any]]] = None,
ids: Optional[List[str]] = None
) -> List[str]:
# Implementation
...
def query(
self,
query: str,
top_k: int = 10,
**kwargs
) -> List[Dict[str, Any]]:
# Implementation
...
def get_stats(self) -> IndexStats:
return IndexStats(
document_count=len(self.documents),
index_type=self.index_type
)
def clear(self) -> None:
self.documents.clear()
# Register
registry = get_index_registry()
registry.register("my_index", MyIndex)
- KeywordIndex uses pure Python BM25 implementation
- No external dependencies for keyword indexing
- Vector indices require embedding models (lazy-loaded)