Core protocols for the PraisonAI knowledge stack - readers, vector stores, retrievers, rerankers, indices, and query engines
from praisonaiagents.knowledge import Document
doc = Document(
content="Hello world",
metadata={"source": "test.txt", "page": 1},
doc_id="doc-123",
embedding=[0.1, 0.2, 0.3] # Optional
)
# Serialization
doc_dict = doc.to_dict()
doc_restored = Document.from_dict(doc_dict)
from praisonaiagents.knowledge import VectorRecord
record = VectorRecord(
id="vec-123",
text="Document content",
embedding=[0.1, 0.2, 0.3],
metadata={"source": "file.pdf"},
score=0.95 # Optional, from query results
)
from praisonaiagents.knowledge import RetrievalResult
result = RetrievalResult(
text="Retrieved content",
score=0.92,
metadata={"source": "doc.pdf"},
doc_id="doc-123",
chunk_index=5
)
from praisonaiagents.knowledge import RerankResult
result = RerankResult(
text="Reranked content",
score=0.98,
original_index=2,
metadata={}
)
from praisonaiagents.knowledge import QueryResult
result = QueryResult(
answer="The answer based on context",
sources=[{"text": "Source 1", "score": 0.9}],
sub_questions=["What is X?", "What is Y?"],
metadata={"mode": "sub_question"}
)
from praisonaiagents.knowledge import RetrievalStrategy
RetrievalStrategy.BASIC # Simple vector similarity
RetrievalStrategy.FUSION # Multi-query with RRF
RetrievalStrategy.RECURSIVE # Depth-limited recursive
RetrievalStrategy.AUTO_MERGE # Merge adjacent chunks
RetrievalStrategy.HYBRID # Vector + keyword
from praisonaiagents.knowledge import IndexType
IndexType.VECTOR # Vector similarity index
IndexType.KEYWORD # BM25 keyword index
IndexType.HYBRID # Combined vector + keyword
IndexType.GRAPH # Graph-based index
from praisonaiagents.knowledge import QueryMode
QueryMode.DEFAULT # Standard RAG query
QueryMode.SUB_QUESTION # Decompose complex questions
QueryMode.SQL # SQL query generation
QueryMode.ROUTER # Route to appropriate handler
QueryMode.SUMMARIZE # Summarize retrieved context
from praisonaiagents.knowledge.vector_store import InMemoryVectorStore
store = InMemoryVectorStore()
# Add vectors
ids = store.add(
texts=["Hello world", "Goodbye world"],
embeddings=[[0.1, 0.2], [0.3, 0.4]],
metadatas=[{"source": "a"}, {"source": "b"}]
)
# Query by similarity
results = store.query(
embedding=[0.1, 0.2],
top_k=5,
filter={"source": "a"}
)
# Delete
store.delete(ids=ids)
store.delete(filter={"source": "a"})
store.delete(delete_all=True)
# Get by ID
records = store.get(ids=["id1", "id2"])
# Count
count = store.count()
from praisonaiagents.knowledge import get_vector_store_registry
registry = get_vector_store_registry()
# List available stores
stores = registry.list_stores() # ['memory']
# Get a store
store = registry.get("memory")
# Register custom store
registry.register("custom", MyCustomStore)
from praisonaiagents.knowledge.retrieval import reciprocal_rank_fusion
# Fuse results from multiple queries
list1 = [RetrievalResult(text="A", score=0.9, doc_id="1")]
list2 = [RetrievalResult(text="A", score=0.8, doc_id="1")]
fused = reciprocal_rank_fusion([list1, list2], k=60)
from praisonaiagents.knowledge.retrieval import merge_adjacent_chunks
results = [
RetrievalResult(text="Part 1", score=0.9, doc_id="1", chunk_index=0),
RetrievalResult(text="Part 2", score=0.8, doc_id="1", chunk_index=1),
]
merged = merge_adjacent_chunks(results, max_gap=1)
from praisonaiagents.knowledge.rerankers import SimpleReranker
reranker = SimpleReranker()
results = reranker.rerank(
query="Python programming",
documents=["Python is great", "Java is different", "Python tutorial"],
top_k=2
)
for r in results:
print(f"{r.text}: {r.score}")
from praisonaiagents.knowledge.index import KeywordIndex
index = KeywordIndex()
# Add documents
ids = index.add_documents(
texts=["Python programming", "Machine learning"],
metadatas=[{"type": "tutorial"}, {"type": "guide"}]
)
# Query
results = index.query("Python", top_k=5)
# Get stats
stats = index.stats()
print(f"Documents: {stats.document_count}, Tokens: {stats.total_tokens}")
from praisonaiagents.knowledge.query_engine import SimpleQueryEngine
engine = SimpleQueryEngine()
result = engine.query(
"What is Python?",
context=["Python is a programming language."]
)
print(result.answer)
print(result.sources)
from praisonaiagents.knowledge.query_engine import SubQuestionEngine
engine = SubQuestionEngine()
result = engine.query(
"What is Python and how to install it?",
context=["Python is a language.", "Install with pip."]
)
print(result.sub_questions) # ['What is Python?', 'How to install Python?']
print(result.answer)
from praisonaiagents.knowledge.query_engine import decompose_question
questions = decompose_question("What is X and what is Y?")
# ['What is X?', 'What is Y?']
from praisonaiagents import Agent
agent = Agent(
name="Knowledge Agent",
instructions="Answer questions using the knowledge base",
knowledge=["docs/manual.pdf", "data/faq.txt"],
knowledge_config={
"vector_store": {
"provider": "chroma",
"config": {"collection_name": "my_kb"}
}
}
)
response = agent.chat("How do I authenticate?")