from curl_cffi import requests as req from bs4 import BeautifulSoup import html2text def scrape_to_markdown(url): """ Scrapes a webpage and converts its content to markdown format. Args: url (str): The URL of the webpage to scrape Returns: str: The webpage content converted to markdown """ # Fetch HTML content response = req.get(url, impersonate='chrome110') soup = BeautifulSoup(response.text, 'html.parser') # Clean up unwanted tags for tag in soup(['script', 'style', 'noscript', 'svg', 'css']): tag.decompose() # Extract cleaned HTML clean_html = str(soup) # Convert to Markdown markdown = html2text.html2text(clean_html) return markdown