bcci commited on
Commit
51bbabe
·
verified ·
1 Parent(s): be31cd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -18
app.py CHANGED
@@ -26,28 +26,21 @@ def convert_html_to_md(html):
26
  os.remove(temp_file_path)
27
  return x
28
 
29
- # POST endpoint to /reader expecting URL in JSON body
30
- @app.post("/reader", response_class=PlainTextResponse)
31
- async def get_markdown_post(request: Request):
32
- try:
33
- request_data = await request.json()
34
- url = request_data.get("url")
35
- if not url:
36
- raise HTTPException(status_code=400, detail="Please provide a URL in the request body as JSON: {'url': 'your_url'}")
37
- decoded_url = unquote(url)
38
- markdown_output = convert_html_to_md(scraper(decoded_url))
39
- return PlainTextResponse(markdown_output)
40
- except HTTPException as http_exc:
41
- raise http_exc
42
- except Exception as e:
43
- raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
44
-
45
  # GET endpoint to /read/{url:path} expecting URL in path
46
  @app.get("/read/{url:path}", response_class=PlainTextResponse)
47
  async def get_markdown_get(url: str):
48
  try:
49
- decoded_url = unquote(url) # URL in path needs unquoting as well
50
- markdown_output = convert_html_to_md(scraper(decoded_url))
 
 
 
 
 
 
 
 
 
51
  return PlainTextResponse(markdown_output)
52
  except Exception as e:
53
  raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
 
26
  os.remove(temp_file_path)
27
  return x
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # GET endpoint to /read/{url:path} expecting URL in path
30
  @app.get("/read/{url:path}", response_class=PlainTextResponse)
31
  async def get_markdown_get(url: str):
32
  try:
33
+ # Retrieve the full path from the request
34
+ full_url = str(request.url)
35
+
36
+ # Extract the part of the URL after `/read/`
37
+ full_url = full_url.split("/read/")[1]
38
+
39
+ # Additional optional URL validation if needed
40
+ if not full_url.startswith(('http://', 'https://')):
41
+ full_url = f"http://{full_url}"
42
+
43
+ markdown_output = convert_html_to_md(scraper(full_url))
44
  return PlainTextResponse(markdown_output)
45
  except Exception as e:
46
  raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")