[FIX] improve transcript extraction by handling missing language captions
Browse files
tool.py
CHANGED
@@ -80,12 +80,13 @@ class YouTubeTranscriptExtractor(Tool):
|
|
80 |
# Create a YouTube object
|
81 |
yt = YouTube(video_url)
|
82 |
lang='en'
|
83 |
-
|
84 |
-
|
85 |
-
transcript = yt.captions[
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
89 |
|
90 |
# Clean up the transcript by removing timestamps and line numbers
|
91 |
cleaned_transcript = ""
|
|
|
80 |
# Create a YouTube object
|
81 |
yt = YouTube(video_url)
|
82 |
lang='en'
|
83 |
+
# Get the video transcript
|
84 |
+
try:
|
85 |
+
transcript = yt.captions[lang].generate_srt_captions()
|
86 |
+
except KeyError:
|
87 |
+
# If the specified language is not available, get the first available transcript
|
88 |
+
transcript = next(iter(yt.captions.values())).generate_srt_captions()
|
89 |
+
lang = next(iter(yt.captions.keys()))
|
90 |
|
91 |
# Clean up the transcript by removing timestamps and line numbers
|
92 |
cleaned_transcript = ""
|