bcci commited on
Commit
06eae38
·
verified ·
1 Parent(s): ed4b796

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -31,8 +31,8 @@ async def scraper(url):
31
 
32
  async def text_scraper(url):
33
  """Fetches HTML content using AsyncFetcher and than extract text."""
34
- html = await async_fetcher.get(url)
35
- return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html.get_all_text())).strip()
36
 
37
 
38
  async def convert_html_to_md(html):
@@ -76,8 +76,10 @@ async def get_text_get(request: Request, url: str):
76
  if not full_url.startswith(('http://', 'https://')):
77
  full_url = f"http://{full_url}"
78
 
79
- markdown_output = await text_scraper(full_url)
80
- return PlainTextResponse(markdown_output)
 
 
81
  except Exception as e:
82
  raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
83
 
 
31
 
32
  async def text_scraper(url):
33
  """Fetches HTML content using AsyncFetcher and than extract text."""
34
+ html = await async_fetcher.get(url).get_all_text()
35
+ return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html)).strip()
36
 
37
 
38
  async def convert_html_to_md(html):
 
76
  if not full_url.startswith(('http://', 'https://')):
77
  full_url = f"http://{full_url}"
78
 
79
+ print(full_url)
80
+
81
+ text_output = await text_scraper(full_url)
82
+ return PlainTextResponse(text_output)
83
  except Exception as e:
84
  raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
85