File size: 1,500 Bytes
cbf58a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import time
import gradio as gr
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc

def get_captions_selenium(video_url):
    try:
        # Launch browser
        options = uc.ChromeOptions()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")

        driver = uc.Chrome(options=options)

        driver.get(video_url)
        time.sleep(5)

        # Click "..." -> "Open transcript"
        # YouTube UI changes often; this is just an example. May need tuning.

        # Try to find subtitles in the page source (for auto-generated)
        page_source = driver.page_source
        if "captionTracks" in page_source:
            start = page_source.find("captionTracks")
            end = page_source.find("]", start) + 1
            caption_json = page_source[start:end]
            driver.quit()
            return "✅ Found potential captions info in page source (you may need to parse this JSON)."
        else:
            driver.quit()
            return "⚠️ Captions info not found in source. May not be available or blocked."

    except Exception as e:
        return f"❌ Error: {str(e)}"

# Gradio interface
gr.Interface(
    fn=get_captions_selenium,
    inputs=[gr.Textbox(label="YouTube Video URL")],
    outputs="text",
    title="YouTube Captions Scraper (Selenium)",
    description="Extract captions using headless browser via Selenium."
).launch()