from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import PlainTextResponse
from urllib.parse import unquote
import uvicorn
from scrapling import AsyncFetcher, StealthyFetcher 
from markitdown._markitdown import HtmlConverter
import tempfile
import os
import asyncio
import re

app = FastAPI()

async_fetcher = AsyncFetcher(auto_match=True)  # Use AsyncFetcher
stealthy_fetcher = StealthyFetcher()  # Keep StealthyFetcher (it handles its own async internally)
md = HtmlConverter()


async def stealthy_scraper(url):
    """Fetches HTML content using StealthyFetcher (already async internally)."""
    # Use await if async_fetch is available, otherwise keep .fetch
    html = await stealthy_fetcher.async_fetch(url)  #Corrected to async_fetch
    return html.html_content


async def scraper(url):
    """Fetches HTML content using AsyncFetcher."""
    html = await async_fetcher.get(url)  # Use await for async operations
    return html.html_content
    

async def text_scraper(url):
    """Fetches HTML content using AsyncFetcher and than extract text."""
    html = await async_fetcher.get(url)
    return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html.get_all_text())).strip()


async def convert_html_to_md(html):
    """Converts HTML to Markdown using MarkItDown (assuming it's synchronous)."""
    md_text = await asyncio.to_thread(md._convert, html)
    return md_text.text_content


@app.get("/read/{url:path}", response_class=PlainTextResponse)
async def get_markdown_get(request: Request, url: str):
    """Handles GET requests to /read/{url}, returning Markdown content."""
    try:
        full_url = str(request.url)
        full_url = full_url.split("/read/")[1]

        if not full_url.startswith(('http://', 'https://')):
            full_url = f"http://{full_url}"

        markdown_output = await convert_html_to_md(await scraper(full_url))
        return PlainTextResponse(markdown_output)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")
        

@app.get("/read_text/{url:path}", response_class=PlainTextResponse)
async def get_text_get(request: Request, url: str):
    """Handles GET requests to /read/{url}, returning Markdown content."""
    try:
        full_url = str(request.url)
        full_url = full_url.split("/read_text/")[1]

        if not full_url.startswith(('http://', 'https://')):
            full_url = f"http://{full_url}"

        text_output = await text_scraper(full_url)
        return PlainTextResponse(text_output)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")


@app.get("/reader/{url:path}", response_class=PlainTextResponse)
async def get_markdown_get_stealthy(request: Request, url: str):  # Renamed for clarity
    """Handles GET requests to /reader/{url}, using StealthyFetcher."""
    try:
        full_url = str(request.url)
        full_url = full_url.split("/reader/")[1]

        if not full_url.startswith(('http://', 'https://')):
            full_url = f"http://{full_url}"

        markdown_output = await convert_html_to_md(await stealthy_scraper(full_url))
        return PlainTextResponse(markdown_output)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing URL: {e}")


if __name__ == "__main__":
    async def run_app():

        #This part is only needed for StealthyFetcher to work.
        try:
            process = await asyncio.create_subprocess_exec('camoufox', 'fetch')
            await process.wait() #Wait for camoufox to initialize
            print("Camoufox initialized successfully!")
        except Exception as e:
             print(f"An unexpected error occurred starting camoufox: {e}")


        config = uvicorn.Config(app, host="0.0.0.0", port=7860)
        server = uvicorn.Server(config)
        await server.serve()

    asyncio.run(run_app())