Set your OpenAI API key as an environment variable:
Copy
export OPENAI_API_KEY=your_api_key_here
3
Create Script
Create a new file image_to_text.py:
Copy
from praisonaiagents import Agent, Task, PraisonAIAgents# Create Image-to-Text Agentimage_text_agent = Agent( name="ImageTextConverter", role="Image Text Extraction Specialist", goal="Convert image content to textual descriptions and extract text", backstory="""You are an expert in OCR and image understanding. You excel at extracting text from images and generating detailed descriptions.""", llm="gpt-4o-mini", self_reflect=False)# Create text extraction taskextraction_task = Task( name="extract_text", description="Extract all text from this image and describe its layout.", expected_output="Extracted text and layout description", agent=image_text_agent, images=["document.jpg"])# Create description taskdescription_task = Task( name="generate_description", description="Generate a detailed description of the image content.", expected_output="Comprehensive description of visual elements", agent=image_text_agent, images=["scene.jpg"])# Create PraisonAIAgents instanceagents = PraisonAIAgents( agents=[image_text_agent], tasks=[extraction_task, description_task], process="sequential", verbose=1)# Run analysisagents.start()
# Example: Processing a document imagedocument_task = Task( name="process_document", description="Extract text and analyze document layout", expected_output="Extracted text with layout information", agent=image_text_agent, images=["business_document.jpg"])# Run single taskagents = PraisonAIAgents( agents=[image_text_agent], tasks=[document_task], process="sequential")agents.start()