Youtube_Dockor / app.py
Rivalcoder
Add
a0bdeee
raw
history blame
1.61 kB
import time
import gradio as gr
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
def get_captions_selenium(video_url):
try:
print("πŸš€ Launching Chrome...")
options = uc.ChromeOptions()
options.add_argument("--headless=new") # Use 'new' headless mode for Chrome 109+
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = uc.Chrome(options=options)
print("🌍 Navigating to video...")
driver.get(video_url)
print("βŒ› Waiting for page to load...")
time.sleep(5)
print("πŸ“„ Scraping page source...")
page_source = driver.page_source
if "captionTracks" in page_source:
start = page_source.find("captionTracks")
end = page_source.find("]", start) + 1
caption_json = page_source[start:end]
driver.quit()
return "βœ… Found potential captions info in page source (you may need to parse this JSON).\n\n" + caption_json
else:
driver.quit()
return "⚠️ Captions info not found in source. May not be available or blocked."
except Exception as e:
print(f"❌ Exception occurred: {e}")
return f"❌ Error: {str(e)}"
# Gradio interface
gr.Interface(
fn=get_captions_selenium,
inputs=[gr.Textbox(label="YouTube Video URL")],
outputs="text",
title="YouTube Captions Scraper (Selenium)",
description="Uses Selenium with undetected-chromedriver to extract captions from a YouTube video."
).launch()