Spaces:
Running
on
Zero
Running
on
Zero
from pathlib import Path | |
import torch | |
from faster_whisper import WhisperModel | |
from src.data.chapters import sec_to_hms | |
# Set device and disable TF32 for consistent results | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
class ASRProcessor: | |
""" | |
Automatic Speech Recognition processor using WhisperX. | |
Transcribes audio files and returns time-aligned transcription segments. | |
""" | |
def __init__(self, model_name="large-v2", compute_type="float16"): | |
self.model_name = model_name | |
self.model = WhisperModel(model_name, device=device, compute_type=compute_type) | |
def get_asr(self, audio_file, return_duration=True): | |
assert Path(audio_file).exists(), f"File {audio_file} does not exist" | |
segments, info = self.model.transcribe( | |
audio_file, length_penalty=0.5, condition_on_previous_text=False | |
) | |
asr_clean = [] | |
for segment in segments: | |
t = segment.text.strip() | |
s = sec_to_hms(segment.start) | |
asr_clean.append(f"{s}: {t}") | |
if return_duration: | |
return "\n".join(asr_clean) + "\n", info.duration | |
else: | |
return "\n".join(asr_clean) + "\n" | |