Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -31,8 +31,8 @@ async def scraper(url):
|
|
31 |
|
32 |
async def text_scraper(url):
|
33 |
"""Fetches HTML content using AsyncFetcher and than extract text."""
|
34 |
-
html = await async_fetcher.get(url)
|
35 |
-
return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html
|
36 |
|
37 |
|
38 |
async def convert_html_to_md(html):
|
@@ -76,8 +76,10 @@ async def get_text_get(request: Request, url: str):
|
|
76 |
if not full_url.startswith(('http://', 'https://')):
|
77 |
full_url = f"http://{full_url}"
|
78 |
|
79 |
-
|
80 |
-
|
|
|
|
|
81 |
except Exception as e:
|
82 |
raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
|
83 |
|
|
|
31 |
|
32 |
async def text_scraper(url):
|
33 |
"""Fetches HTML content using AsyncFetcher and than extract text."""
|
34 |
+
html = await async_fetcher.get(url).get_all_text()
|
35 |
+
return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html)).strip()
|
36 |
|
37 |
|
38 |
async def convert_html_to_md(html):
|
|
|
76 |
if not full_url.startswith(('http://', 'https://')):
|
77 |
full_url = f"http://{full_url}"
|
78 |
|
79 |
+
print(full_url)
|
80 |
+
|
81 |
+
text_output = await text_scraper(full_url)
|
82 |
+
return PlainTextResponse(text_output)
|
83 |
except Exception as e:
|
84 |
raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
|
85 |
|