importosfromcrawl4aiimportWebCrawlerfromcrawl4ai.extraction_strategyimportLLMExtractionStrategyfrompydanticimportBaseModel,FieldclassOpenAIModelFee(BaseModel):model_name:str=Field(...,description="Name of the OpenAI model.")input_fee:str=Field(...,description="Fee for input token for the OpenAI model.")output_fee:str=Field(...,description="Fee for output token Γfor the OpenAI model.")url='https://openai.com/api/pricing/'crawler=WebCrawler()crawler.warmup()result=crawler.run(url=url,word_count_threshold=1,extraction_strategy=LLMExtractionStrategy(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY'),schema=OpenAIModelFee.schema(),extraction_type="schema",instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. Do not miss any models in the entire content. One extracted model JSON format should look like this: {"model_name": "GPT-4", "input_fee": "US$10.00 / 1M tokens", "output_fee": "US$30.00 / 1M tokens"}."""),bypass_cache=True,)print(result.extracted_content)
framework:crewaitopic:extract model pricing from websitesroles:web_scraper:backstory:An expert in web scraping with a deep understanding of extracting structureddata from online sources. https://openai.com/api/pricing/ https://www.anthropic.com/pricing https://cohere.com/pricinggoal:Gather model pricing data from various websitesrole:Web Scrapertasks:scrape_model_pricing:description:Scrape model pricing information from the provided list of websites.expected_output:Raw HTML or JSON containing model pricing data.tools:-'ModelFeeTool'dependencies:[]