Spaces:

bcci
/

reader-api

Sleeping

bcci commited on Feb 11

Commit

06eae38

verified ·

1 Parent(s): ed4b796

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,8 +31,8 @@ async def scraper(url):
 async def text_scraper(url):
     """Fetches HTML content using AsyncFetcher and than extract text."""
-    html = await async_fetcher.get(url)
-    return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html.get_all_text())).strip()
 async def convert_html_to_md(html):
@@ -76,8 +76,10 @@ async def get_text_get(request: Request, url: str):
         if not full_url.startswith(('http://', 'https://')):
             full_url = f"http://{full_url}"
-        markdown_output = await text_scraper(full_url)
-        return PlainTextResponse(markdown_output)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")

 async def text_scraper(url):
     """Fetches HTML content using AsyncFetcher and than extract text."""
+    html = await async_fetcher.get(url).get_all_text()
+    return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html)).strip()
 async def convert_html_to_md(html):
         if not full_url.startswith(('http://', 'https://')):
             full_url = f"http://{full_url}"
+        print(full_url)
+        text_output = await text_scraper(full_url)
+        return PlainTextResponse(text_output)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")