Banafo commited on
Commit
bd53ed8
·
verified ·
1 Parent(s): 5150994

Update app.py

Browse files

Normalize the audio volume from the microphone

Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -142,7 +142,14 @@ def transcribe_microphone_stream(audio_chunk, stream_state, language):
142
  sample_rate, waveform_np = audio_chunk
143
  if len(waveform_np.shape) > 1:
144
  waveform_np = waveform_np.mean(axis=1)
145
-
 
 
 
 
 
 
 
146
  # Resample if needed
147
  if sample_rate != 16000:
148
  waveform = torch.from_numpy(waveform_np).float().unsqueeze(0)
@@ -151,6 +158,8 @@ def transcribe_microphone_stream(audio_chunk, stream_state, language):
151
  waveform_np = waveform.squeeze(0).numpy()
152
  sample_rate = 16000
153
 
 
 
154
  # Initialize stream if first chunk
155
  if stream_state is None:
156
  stream_state = recognizer.create_stream()
 
142
  sample_rate, waveform_np = audio_chunk
143
  if len(waveform_np.shape) > 1:
144
  waveform_np = waveform_np.mean(axis=1)
145
+
146
+ # Normalize if needed
147
+ if waveform_np.dtype != np.float32:
148
+ waveform_np = waveform_np.astype(np.float32)
149
+
150
+ if np.max(np.abs(waveform_np)) > 1.0:
151
+ waveform_np = waveform_np / np.max(np.abs(waveform_np))
152
+
153
  # Resample if needed
154
  if sample_rate != 16000:
155
  waveform = torch.from_numpy(waveform_np).float().unsqueeze(0)
 
158
  waveform_np = waveform.squeeze(0).numpy()
159
  sample_rate = 16000
160
 
161
+ waveform_np = np.clip(waveform_np, -1.0, 1.0)
162
+
163
  # Initialize stream if first chunk
164
  if stream_state is None:
165
  stream_state = recognizer.create_stream()