Ritesh-hf commited on
Commit
d6c2014
·
verified ·
1 Parent(s): 0d3b49f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -14,7 +14,7 @@ import re
14
  import scipy.io.wavfile
15
 
16
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
17
- model_id = "openai/whisper-tiny"
18
 
19
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
20
  model_id, low_cpu_mem_usage=True, use_safetensors=True
@@ -31,7 +31,7 @@ pipe = pipeline(
31
  max_new_tokens=128,
32
  chunk_length_s=30,
33
  batch_size=8,
34
- # device=device,
35
  )
36
 
37
 
@@ -39,24 +39,24 @@ arabic_bad_Words = pd.read_csv("arabic_bad_words_dataset.csv")
39
  english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
40
 
41
 
42
- def load_audio(file: str, sr: int = 16000):
43
- try:
44
- # This reads the audio from the video file without creating a separate audio file
45
- command = [
46
- "ffmpeg",
47
- "-i", file,
48
- "-f", "s16le",
49
- "-acodec", "pcm_s16le",
50
- "-ar", str(sr),
51
- "-ac", "1",
52
- "-"
53
- ]
54
 
55
- out = subprocess.run(command, capture_output=True, check=True).stdout
56
- except subprocess.CalledProcessError as e:
57
- raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
58
 
59
- return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
60
 
61
  def clean_english_word(word):
62
  cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word)
 
14
  import scipy.io.wavfile
15
 
16
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
17
+ model_id = "openai/whisper-large-v3"
18
 
19
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
20
  model_id, low_cpu_mem_usage=True, use_safetensors=True
 
31
  max_new_tokens=128,
32
  chunk_length_s=30,
33
  batch_size=8,
34
+ device=device,
35
  )
36
 
37
 
 
39
  english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
40
 
41
 
42
+ # def load_audio(file: str, sr: int = 16000):
43
+ # try:
44
+ # # This reads the audio from the video file without creating a separate audio file
45
+ # command = [
46
+ # "ffmpeg",
47
+ # "-i", file,
48
+ # "-f", "s16le",
49
+ # "-acodec", "pcm_s16le",
50
+ # "-ar", str(sr),
51
+ # "-ac", "1",
52
+ # "-"
53
+ # ]
54
 
55
+ # out = subprocess.run(command, capture_output=True, check=True).stdout
56
+ # except subprocess.CalledProcessError as e:
57
+ # raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
58
 
59
+ # return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
60
 
61
  def clean_english_word(word):
62
  cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word)