Spaces:

Athspi
/

Whshhs

Runtime error

App Files Files Community

Athspi commited on Mar 29

Commit

c02bb52

verified ·

1 Parent(s): f3ad9e5

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -8

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import io
 # Configuration
 SAMPLE_RATE = 24000
-MODEL = "gemini-2.0-flash-exp"  # Correct model name
 class GeminiTTS:
     def __init__(self, api_key):
@@ -35,22 +35,20 @@ class GeminiTTS:
                 async for response in session.receive():
                     if audio_data := response.data:
-                        # Convert to numpy array and normalize
                         audio_array = np.frombuffer(audio_data, dtype=np.float32)
                         # Handle empty/quiet audio
                         if audio_array.size == 0:
                             audio_array = np.zeros(int(SAMPLE_RATE * 0.5))  # 0.5s of silence
-                        # Normalize audio
                         max_val = np.max(np.abs(audio_array))
                         if max_val > 0:
                             audio_array = audio_array / max_val
-                        # Convert to WAV bytes for Gradio
-                        with io.BytesIO() as wav_buffer:
-                            sf.write(wav_buffer, audio_array, SAMPLE_RATE, format='WAV')
-                            return (SAMPLE_RATE, wav_buffer.getvalue())
                     if text_response := response.text:
                         return text_response
@@ -59,6 +57,26 @@ class GeminiTTS:
         except Exception as e:
             return f"Error: {str(e)}"
 def create_interface():
     tts_engine = None
@@ -104,7 +122,7 @@ def create_interface():
             )
             generate_btn = gr.Button("Generate Speech")
-        audio_output = gr.Audio(label="Output Audio")
         text_output = gr.Textbox(label="Messages", interactive=False)
         generate_btn.click(

 # Configuration
 SAMPLE_RATE = 24000
+MODEL = "gemini-2.0-flash-exp"  # Correct experimental model name
 class GeminiTTS:
     def __init__(self, api_key):
                 async for response in session.receive():
                     if audio_data := response.data:
+                        # Convert to numpy array
                         audio_array = np.frombuffer(audio_data, dtype=np.float32)
                         # Handle empty/quiet audio
                         if audio_array.size == 0:
                             audio_array = np.zeros(int(SAMPLE_RATE * 0.5))  # 0.5s of silence
+                        # Normalize audio to prevent processing warnings
                         max_val = np.max(np.abs(audio_array))
                         if max_val > 0:
                             audio_array = audio_array / max_val
+                        # Convert to proper format for Gradio
+                        return self._create_audio_response(audio_array)
                     if text_response := response.text:
                         return text_response
         except Exception as e:
             return f"Error: {str(e)}"
+    def _create_audio_response(self, audio_array):
+        """Create properly formatted audio response for Gradio"""
+        # Convert to 16-bit PCM format
+        audio_array = (audio_array * 32767).astype(np.int16)
+        # Create WAV file in memory
+        with io.BytesIO() as wav_buffer:
+            with sf.SoundFile(
+                wav_buffer,
+                mode='w',
+                samplerate=SAMPLE_RATE,
+                channels=1,
+                format='WAV',
+                subtype='PCM_16'
+            ) as sf_file:
+                sf_file.write(audio_array)
+            wav_bytes = wav_buffer.getvalue()
+        return (SAMPLE_RATE, wav_bytes)
 def create_interface():
     tts_engine = None
             )
             generate_btn = gr.Button("Generate Speech")
+        audio_output = gr.Audio(label="Output Audio", type="filepath")
         text_output = gr.Textbox(label="Messages", interactive=False)
         generate_btn.click(