import time import gradio as gr from selenium.webdriver.common.by import By import undetected_chromedriver as uc def get_captions_selenium(video_url): try: # Launch browser options = uc.ChromeOptions() options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") driver = uc.Chrome(options=options) driver.get(video_url) time.sleep(5) # Click "..." -> "Open transcript" # YouTube UI changes often; this is just an example. May need tuning. # Try to find subtitles in the page source (for auto-generated) page_source = driver.page_source if "captionTracks" in page_source: start = page_source.find("captionTracks") end = page_source.find("]", start) + 1 caption_json = page_source[start:end] driver.quit() return "✅ Found potential captions info in page source (you may need to parse this JSON)." else: driver.quit() return "⚠️ Captions info not found in source. May not be available or blocked." except Exception as e: return f"❌ Error: {str(e)}" # Gradio interface gr.Interface( fn=get_captions_selenium, inputs=[gr.Textbox(label="YouTube Video URL")], outputs="text", title="YouTube Captions Scraper (Selenium)", description="Extract captions using headless browser via Selenium." ).launch()