# recipe.py
import os
import re
import json
from praisonaiagents import Agent, Task, AgentTeam
def run(input_data: dict, config: dict = None) -> dict:
"""Generate YouTube chapters from transcript."""
transcript_text = input_data.get("transcript_text")
video_path = input_data.get("video_path")
style = input_data.get("style", "concise")
if not transcript_text and not video_path:
return {
"ok": False,
"error": {"code": "MISSING_INPUT", "message": "Either transcript_text or video_path is required"}
}
# If video_path provided, extract transcript first
if video_path and not transcript_text:
if not os.path.exists(video_path):
return {"ok": False, "error": {"code": "FILE_NOT_FOUND", "message": f"Video not found: {video_path}"}}
# Would integrate with transcription here
return {"ok": False, "error": {"code": "NOT_IMPLEMENTED", "message": "Video transcription not implemented. Provide transcript_text."}}
try:
style_instructions = {
"concise": "Create short, punchy chapter titles (3-5 words). Focus on key topics only.",
"detailed": "Create descriptive chapter titles with context. Include subtopics."
}
# Create chapter analyzer agent
analyzer = Agent(
name="Content Analyzer",
role="Video Content Specialist",
goal="Identify logical chapter breaks in video content",
instructions=f"""
You are a YouTube content specialist.
- Identify major topic transitions
- Find natural break points
- {style_instructions[style]}
- Ensure first chapter starts at 00:00
- Aim for 5-15 chapters for typical videos
""",
)
# Create formatter agent
formatter = Agent(
name="Chapter Formatter",
role="YouTube SEO Expert",
goal="Format chapters for YouTube compatibility",
instructions="""
You are a YouTube SEO expert.
- Format timestamps as HH:MM:SS or MM:SS
- Keep titles under 100 characters
- Make titles searchable and descriptive
- Ensure proper YouTube chapter format
""",
)
# Define tasks
analyze_task = Task(
name="analyze_content",
description=f"""
Analyze this transcript and identify chapter breaks:
{transcript_text[:5000]} # Truncate for context
Identify 5-15 logical chapter points with timestamps.
""",
expected_output="List of chapter points with timestamps and topics",
agent=analyzer,
)
format_task = Task(
name="format_chapters",
description="""
Format the chapters for YouTube:
- Start with 00:00
- Use consistent timestamp format
- Create engaging titles
Output as JSON array: [{"timestamp": "00:00", "title": "Introduction"}, ...]
""",
expected_output="JSON array of formatted chapters",
agent=formatter,
context=[analyze_task],
)
# Execute
agents = AgentTeam(
agents=[analyzer, formatter],
tasks=[analyze_task, format_task],
)
result = agents.start()
# Parse chapters
chapters_text = result.get("format_chapters", "[]")
chapters = parse_chapters(chapters_text)
# Generate YouTube description
description = generate_description(chapters)
return {
"ok": True,
"chapters_json": chapters,
"description_text": description,
"artifacts": [],
"warnings": [],
}
except Exception as e:
return {"ok": False, "error": {"code": "PROCESSING_ERROR", "message": str(e)}}
def parse_chapters(text: str) -> list:
"""Parse chapters from agent output."""
try:
# Try JSON parse first
match = re.search(r'\[.*\]', text, re.DOTALL)
if match:
return json.loads(match.group())
except json.JSONDecodeError:
pass
# Fallback: parse timestamp lines
chapters = []
for line in text.split('\n'):
match = re.match(r'(\d{1,2}:\d{2}(?::\d{2})?)\s*[-–:]\s*(.+)', line.strip())
if match:
chapters.append({
"timestamp": match.group(1),
"title": match.group(2).strip()
})
return chapters
def generate_description(chapters: list) -> str:
"""Generate YouTube-compatible description."""
lines = ["📚 Chapters:", ""]
for ch in chapters:
lines.append(f"{ch['timestamp']} {ch['title']}")
return '\n'.join(lines)