VPCSinfo commited on
Commit
f279b36
·
1 Parent(s): b984651

[FIX] improve transcript extraction by handling missing language captions

Browse files
Files changed (1) hide show
  1. tool.py +7 -6
tool.py CHANGED
@@ -80,12 +80,13 @@ class YouTubeTranscriptExtractor(Tool):
80
  # Create a YouTube object
81
  yt = YouTube(video_url)
82
  lang='en'
83
- # Get the video transcript
84
- if lang in yt.captions:
85
- transcript = yt.captions['en'].generate_srt_captions()
86
- else:
87
- transcript = yt.captions.all()[0].generate_srt_captions()
88
- lang = yt.captions.all()[0].code
 
89
 
90
  # Clean up the transcript by removing timestamps and line numbers
91
  cleaned_transcript = ""
 
80
  # Create a YouTube object
81
  yt = YouTube(video_url)
82
  lang='en'
83
+ # Get the video transcript
84
+ try:
85
+ transcript = yt.captions[lang].generate_srt_captions()
86
+ except KeyError:
87
+ # If the specified language is not available, get the first available transcript
88
+ transcript = next(iter(yt.captions.values())).generate_srt_captions()
89
+ lang = next(iter(yt.captions.keys()))
90
 
91
  # Clean up the transcript by removing timestamps and line numbers
92
  cleaned_transcript = ""