Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ import re
|
|
14 |
import scipy.io.wavfile
|
15 |
|
16 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
17 |
-
model_id = "openai/whisper-
|
18 |
|
19 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
20 |
model_id, low_cpu_mem_usage=True, use_safetensors=True
|
@@ -31,7 +31,7 @@ pipe = pipeline(
|
|
31 |
max_new_tokens=128,
|
32 |
chunk_length_s=30,
|
33 |
batch_size=8,
|
34 |
-
|
35 |
)
|
36 |
|
37 |
|
@@ -39,24 +39,24 @@ arabic_bad_Words = pd.read_csv("arabic_bad_words_dataset.csv")
|
|
39 |
english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
|
40 |
|
41 |
|
42 |
-
def load_audio(file: str, sr: int = 16000):
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
|
61 |
def clean_english_word(word):
|
62 |
cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word)
|
|
|
14 |
import scipy.io.wavfile
|
15 |
|
16 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
17 |
+
model_id = "openai/whisper-large-v3"
|
18 |
|
19 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
20 |
model_id, low_cpu_mem_usage=True, use_safetensors=True
|
|
|
31 |
max_new_tokens=128,
|
32 |
chunk_length_s=30,
|
33 |
batch_size=8,
|
34 |
+
device=device,
|
35 |
)
|
36 |
|
37 |
|
|
|
39 |
english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
|
40 |
|
41 |
|
42 |
+
# def load_audio(file: str, sr: int = 16000):
|
43 |
+
# try:
|
44 |
+
# # This reads the audio from the video file without creating a separate audio file
|
45 |
+
# command = [
|
46 |
+
# "ffmpeg",
|
47 |
+
# "-i", file,
|
48 |
+
# "-f", "s16le",
|
49 |
+
# "-acodec", "pcm_s16le",
|
50 |
+
# "-ar", str(sr),
|
51 |
+
# "-ac", "1",
|
52 |
+
# "-"
|
53 |
+
# ]
|
54 |
|
55 |
+
# out = subprocess.run(command, capture_output=True, check=True).stdout
|
56 |
+
# except subprocess.CalledProcessError as e:
|
57 |
+
# raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
58 |
|
59 |
+
# return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
60 |
|
61 |
def clean_english_word(word):
|
62 |
cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', word)
|