Spaces:

lucas-ventura
/

chapter-llama

Running on Zero

chapter-llama / tools /extract /asr_faster_whisper.py

Create asr_faster_whisper.py

4771930 verified 26 days ago

1.2 kB

	from pathlib import Path

	import torch
	from faster_whisper import WhisperModel

	from src.data.chapters import sec_to_hms

	# Set device and disable TF32 for consistent results
	device = "cuda" if torch.cuda.is_available() else "cpu"


	class ASRProcessor:
	"""
	Automatic Speech Recognition processor using WhisperX.

	Transcribes audio files and returns time-aligned transcription segments.
	"""

	def __init__(self, model_name="large-v2", compute_type="float16"):
	self.model_name = model_name
	self.model = WhisperModel(model_name, device=device, compute_type=compute_type)

	def get_asr(self, audio_file, return_duration=True):
	assert Path(audio_file).exists(), f"File {audio_file} does not exist"
	segments, info = self.model.transcribe(
	audio_file, length_penalty=0.5, condition_on_previous_text=False
	)

	asr_clean = []
	for segment in segments:
	t = segment.text.strip()
	s = sec_to_hms(segment.start)
	asr_clean.append(f"{s}: {t}")

	if return_duration:
	return "\n".join(asr_clean) + "\n", info.duration
	else:
	return "\n".join(asr_clean) + "\n"