nb-whisper-demo

Running on T4

AngelinaZanardi commited on Oct 15, 2024

Commit

b8fdc42

verified ·

1 Parent(s): 76fe147

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,16 +36,14 @@ def pipe(file, return_timestamps=False,lang="nn"):
         device=device,
         token=auth_token,
         torch_dtype=torch.float16,
-        model_kwargs={"attn_implementation": "flash_attention_2", "num_beams": 5, "language": "nn"} if FLASH_ATTENTION else {"attn_implementation": "sdpa", "num_beams": 5},
     )
     asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(
         language=lang,
         task="transcribe",
         no_timestamps=not return_timestamps,
     )
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    print("Supported languages:", tokenizer.lang_code_to_id.keys())
-    return asr(file, return_timestamps=return_timestamps, batch_size=24)
 def format_output(text):
     # Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."

         device=device,
         token=auth_token,
         torch_dtype=torch.float16,
+        model_kwargs={"attn_implementation": "flash_attention_2", "num_beams": 5, "language": lang} if FLASH_ATTENTION else {"attn_implementation": "sdpa", "num_beams": 5},
     )
     asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(
         language=lang,
         task="transcribe",
         no_timestamps=not return_timestamps,
     )
+    return asr(file, return_timestamps=return_timestamps, batch_size=24, language=lang, task="transcribe")
 def format_output(text):
     # Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."