Spaces:

Rivalcoder
/

Youtube_Dockor

Runtime error

File size: 2,069 Bytes

5427076
cbf58a5
 
 
 
 
5427076
cbf58a5
 
5427076
cbf58a5
5427076
 
 
cbf58a5
 
 
 
5427076
cbf58a5
 
a0bdeee
 
cbf58a5
a0bdeee
cbf58a5
a0bdeee
cbf58a5
 
 
 
 
5427076
 
 
 
 
cbf58a5
 
 
 
 
a0bdeee
cbf58a5
 
5427076
 
cbf58a5
 
5427076
39ddda1
 
5427076
cbf58a5
 
5427076

import os
import time
import gradio as gr
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc

# Function to extract YouTube captions using a headless browser
def get_captions_selenium(video_url):
    try:
        print("🚀 Launching Chromium via undetected-chromedriver...")
        options = uc.ChromeOptions()
        # Point to the system-installed Chromium binary
        options.binary_location = os.environ.get("CHROME_BINARY", "/usr/bin/chromium")
        options.add_argument("--headless=new")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")

        driver = uc.Chrome(options=options)
        print("🌍 Navigating to video URL...")
        driver.get(video_url)

        print("⌛ Waiting for page to load...")
        time.sleep(5)

        print("📄 Scraping page source...")
        page_source = driver.page_source

        if "captionTracks" in page_source:
            start = page_source.find("captionTracks")
            end = page_source.find("]", start) + 1
            caption_json = page_source[start:end]
            driver.quit()
            return (
                "✅ Found potential captions info.\n"
                "(You can parse this JSON string to extract subtitles.)\n\n"
                + caption_json
            )
        else:
            driver.quit()
            return "⚠️ Captions info not found in source. May not be available or blocked."

    except Exception as e:
        print(f"❌ Exception occurred: {e}")
        return f"❌ Error: {str(e)}"

# Gradio interface definition
default_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
gr.Interface(
    fn=get_captions_selenium,
    inputs=[
        gr.Textbox(value=default_url, label="YouTube Video URL")

    ],
    outputs="text",
    title="YouTube Captions Scraper (Selenium)",
    description=(
        "Extract captions from a YouTube video using a headless browser with "
        "undetected-chromedriver. Logs will appear in the Space's console."
    )
).launch()