Spaces:

shukdevdatta123
/

WaveTalk

Running

App Files Files Community

shukdevdatta123 commited on 12 days ago

Commit

30eacd3

verified ·

1 Parent(s): a2d4059

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -6

app.py CHANGED Viewed

@@ -11,6 +11,12 @@ VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "s
 def process_text_input(api_key, text_prompt, selected_voice):
     """Generate audio response from text input"""
     try:
         # Initialize OpenAI client with the provided API key
         client = OpenAI(api_key=api_key)
@@ -27,13 +33,22 @@ def process_text_input(api_key, text_prompt, selected_voice):
         )
         # Save the audio to a temporary file
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
-        # Get the text response - fix to retrieve text content properly
-        text_response = completion.choices[0].message.content[0].text
         return text_response, temp_path
     except Exception as e:
@@ -42,6 +57,9 @@ def process_text_input(api_key, text_prompt, selected_voice):
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
         if not audio_path:
             return "Please upload or record audio first.", None
@@ -84,13 +102,22 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
         )
         # Save the audio response
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
-        # Get the text response - fix to retrieve text content properly
-        text_response = completion.choices[0].message.content[0].text
         return text_response, temp_path
     except Exception as e:
@@ -110,11 +137,14 @@ def download_example_audio():
         return temp_path
     except Exception as e:
         return None
 def use_example_audio():
     """Load example audio for the interface"""
     audio_path = download_example_audio()
     return audio_path
 # Create Gradio Interface
@@ -214,13 +244,22 @@ with gr.Blocks(title="VoxTalk") as app:
                 )
                 # Save the audio to a temporary file
                 wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
                 temp_path = tempfile.mktemp(suffix=".wav")
                 with open(temp_path, "wb") as f:
                     f.write(wav_bytes)
-                # Fix to retrieve text content properly
-                text_response = completion.choices[0].message.content[0].text
                 return f"Sample: {text_response}", temp_path
             except Exception as e:

 def process_text_input(api_key, text_prompt, selected_voice):
     """Generate audio response from text input"""
     try:
+        if not api_key:
+            return "Please enter your OpenAI API key first.", None
+        if not text_prompt:
+            return "Please enter a text prompt first.", None
         # Initialize OpenAI client with the provided API key
         client = OpenAI(api_key=api_key)
         )
         # Save the audio to a temporary file
+        if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
+            return "No audio response was generated.", None
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
+        # Get the text response - with proper error handling
+        text_response = "No text response available."
+        if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
+            if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
+                if hasattr(completion.choices[0].message.content[0], 'text'):
+                    text_response = completion.choices[0].message.content[0].text
+            elif isinstance(completion.choices[0].message.content, str):
+                text_response = completion.choices[0].message.content
         return text_response, temp_path
     except Exception as e:
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
+        if not api_key:
+            return "Please enter your OpenAI API key first.", None
         if not audio_path:
             return "Please upload or record audio first.", None
         )
         # Save the audio response
+        if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
+            return "No audio response was generated.", None
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
+        # Get the text response - with proper error handling
+        text_response = "No text response available."
+        if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
+            if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
+                if hasattr(completion.choices[0].message.content[0], 'text'):
+                    text_response = completion.choices[0].message.content[0].text
+            elif isinstance(completion.choices[0].message.content, str):
+                text_response = completion.choices[0].message.content
         return text_response, temp_path
     except Exception as e:
         return temp_path
     except Exception as e:
+        print(f"Error downloading example audio: {str(e)}")
         return None
 def use_example_audio():
     """Load example audio for the interface"""
     audio_path = download_example_audio()
+    if not audio_path:
+        return None
     return audio_path
 # Create Gradio Interface
                 )
                 # Save the audio to a temporary file
+                if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
+                    return f"No audio response was generated for {voice_type} voice.", None
                 wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
                 temp_path = tempfile.mktemp(suffix=".wav")
                 with open(temp_path, "wb") as f:
                     f.write(wav_bytes)
+                # Get the text response - with proper error handling
+                text_response = "Sample generated"
+                if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
+                    if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
+                        if hasattr(completion.choices[0].message.content[0], 'text'):
+                            text_response = completion.choices[0].message.content[0].text
+                    elif isinstance(completion.choices[0].message.content, str):
+                        text_response = completion.choices[0].message.content
                 return f"Sample: {text_response}", temp_path
             except Exception as e: