Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,9 @@ import torch
|
|
4 |
import librosa
|
5 |
import io
|
6 |
import os
|
|
|
|
|
|
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
@@ -31,6 +34,16 @@ async def transcribe_audio(file: UploadFile = File(...)):
|
|
31 |
audio_bytes = await file.read()
|
32 |
audio_array, sampling_rate = librosa.load(io.BytesIO(audio_bytes), sr=processor.feature_extractor.sampling_rate)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
inputs = processor(
|
35 |
audio_array,
|
36 |
return_tensors="pt",
|
|
|
4 |
import librosa
|
5 |
import io
|
6 |
import os
|
7 |
+
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
|
8 |
+
model = load_silero_vad()
|
9 |
+
|
10 |
|
11 |
app = FastAPI()
|
12 |
|
|
|
34 |
audio_bytes = await file.read()
|
35 |
audio_array, sampling_rate = librosa.load(io.BytesIO(audio_bytes), sr=processor.feature_extractor.sampling_rate)
|
36 |
|
37 |
+
|
38 |
+
|
39 |
+
speech_timestamps = get_speech_timestamps(
|
40 |
+
torch.from_numpy(audio_array),
|
41 |
+
model,
|
42 |
+
return_seconds=True, # Return speech timestamps in seconds (default is samples)
|
43 |
+
)
|
44 |
+
|
45 |
+
print(speech_timestamps)
|
46 |
+
|
47 |
inputs = processor(
|
48 |
audio_array,
|
49 |
return_tensors="pt",
|