fromfirecrawlimportFirecrawlAppfrompraisonai_toolsimportBaseToolimportreclassWebPageScraperTool(BaseTool):name:str="Web Page Scraper Tool"description:str="Scrape and extract information from a given web page URL."def_run(self,url:str)->str:app=FirecrawlApp(api_url='http://localhost:3002')response=app.scrape_url(url=url)content=response["content"]# Remove all content above the line "========================================================"if"========================================================"incontent:content=content.split("========================================================",1)[1]# Remove all menu items and similar patternscontent=re.sub(r'\*\s+\[.*?\]\(.*?\)','',content)content=re.sub(r'\[Skip to the content\]\(.*?\)','',content)content=re.sub(r'\[.*?\]\(.*?\)','',content)content=re.sub(r'\s*Menu\s*','',content)content=re.sub(r'\s*Search\s*','',content)content=re.sub(r'Categories\s*','',content)# Remove all URLscontent=re.sub(r'http\S+','',content)# Remove empty lines or lines with only whitespacecontent='\n'.join([lineforlineincontent.split('\n')ifline.strip()])# Limit to the first 1000 wordswords=content.split()iflen(words)>1000:content=' '.join(words[:1000])returncontent