Spaces:

KDM999
/

asr-multimodel-comparison

Running

KDM999 commited on 23 days ago

Commit

a368f0c

verified ·

1 Parent(s): 70f71c7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -72,16 +72,17 @@ def transcribe_audio(file_path):
         return "Reference not found.", "", "", "", "", "", ""
     model_ids = [
-        "openai/whisper-tiny",
-        "openai/whisper-tiny.en",
-        "openai/whisper-base",
-        "openai/whisper-base.en",
-        "openai/whisper-medium",
-        "openai/whisper-medium.en",
-        "distil-whisper/distil-large-v3.5",
-        "facebook/wav2vec2-base-960h",
-        "facebook/wav2vec2-large-960h",
-        "facebook/hubert-large-ls960-ft",
     ]
     outputs = {}
@@ -144,6 +145,7 @@ with gr.Blocks() as demo:
     distil_html = gr.HTML(label="Distil-Whisper Large")
     wav2vec_base_html = gr.HTML(label="Wav2Vec2 Base")
     wav2vec_large_html = gr.HTML(label="Wav2Vec2 Large")
     hubert_html = gr.HTML(label="HuBERT Large")
     transcribe_btn.click(
@@ -160,6 +162,7 @@ with gr.Blocks() as demo:
             distil_html,
             wav2vec_base_html,
             wav2vec_large_html,
             hubert_html,
         ],
     )

         return "Reference not found.", "", "", "", "", "", ""
     model_ids = [
+        "openai/whisper-tiny", # Smallest, multilingual
+        "openai/whisper-tiny.en", # Tiny, English-only
+        "openai/whisper-base", # Balanced, multilingual
+        "openai/whisper-base.en", # Base, English-only
+        "openai/whisper-medium", # Medium, multilingual
+        "openai/whisper-medium.en", # Medium, English-only
+        "distil-whisper/distil-large-v3.5", # Distilled from Whisper large, Faster & More accurate
+        "facebook/wav2vec2-base-960h", # Base model trained on 960h LibriSpeech (monolingual, English)
+        "facebook/wav2vec2-large-960h", #Larger model, better performance (monolingual, English)
+        "facebook/wav2vec2-large-960h-lv60-self", # Fine-tuned on 60k LibriLight hours
+        "facebook/hubert-large-ls960-ft", # Fine-tuned on LibriSpeech
     ]
     outputs = {}
     distil_html = gr.HTML(label="Distil-Whisper Large")
     wav2vec_base_html = gr.HTML(label="Wav2Vec2 Base")
     wav2vec_large_html = gr.HTML(label="Wav2Vec2 Large")
+    wav2vec_lv60_html = gr.HTML(label="Wav2Vec2 Large + LibriLight")
     hubert_html = gr.HTML(label="HuBERT Large")
     transcribe_btn.click(
             distil_html,
             wav2vec_base_html,
             wav2vec_large_html,
+            wav2vec_lv60_html,
             hubert_html,
         ],
     )