bcci commited on
Commit
0206ee8
·
verified ·
1 Parent(s): 911eefc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -0
app.py CHANGED
@@ -4,6 +4,9 @@ import torch
4
  import librosa
5
  import io
6
  import os
 
 
 
7
 
8
  app = FastAPI()
9
 
@@ -31,6 +34,16 @@ async def transcribe_audio(file: UploadFile = File(...)):
31
  audio_bytes = await file.read()
32
  audio_array, sampling_rate = librosa.load(io.BytesIO(audio_bytes), sr=processor.feature_extractor.sampling_rate)
33
 
 
 
 
 
 
 
 
 
 
 
34
  inputs = processor(
35
  audio_array,
36
  return_tensors="pt",
 
4
  import librosa
5
  import io
6
  import os
7
+ from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
8
+ model = load_silero_vad()
9
+
10
 
11
  app = FastAPI()
12
 
 
34
  audio_bytes = await file.read()
35
  audio_array, sampling_rate = librosa.load(io.BytesIO(audio_bytes), sr=processor.feature_extractor.sampling_rate)
36
 
37
+
38
+
39
+ speech_timestamps = get_speech_timestamps(
40
+ torch.from_numpy(audio_array),
41
+ model,
42
+ return_seconds=True, # Return speech timestamps in seconds (default is samples)
43
+ )
44
+
45
+ print(speech_timestamps)
46
+
47
  inputs = processor(
48
  audio_array,
49
  return_tensors="pt",