Spaces:

shukdevdatta123
/

WaveTalk

Running

App Files Files Community

shukdevdatta123 commited on 11 days ago

Commit

0a8ed36

verified ·

1 Parent(s): 30eacd3

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -50

app.py CHANGED Viewed

@@ -11,12 +11,6 @@ VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "s
 def process_text_input(api_key, text_prompt, selected_voice):
     """Generate audio response from text input"""
     try:
-        if not api_key:
-            return "Please enter your OpenAI API key first.", None
-        if not text_prompt:
-            return "Please enter a text prompt first.", None
         # Initialize OpenAI client with the provided API key
         client = OpenAI(api_key=api_key)
@@ -33,22 +27,13 @@ def process_text_input(api_key, text_prompt, selected_voice):
         )
         # Save the audio to a temporary file
-        if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
-            return "No audio response was generated.", None
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
-        # Get the text response - with proper error handling
-        text_response = "No text response available."
-        if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
-            if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
-                if hasattr(completion.choices[0].message.content[0], 'text'):
-                    text_response = completion.choices[0].message.content[0].text
-            elif isinstance(completion.choices[0].message.content, str):
-                text_response = completion.choices[0].message.content
         return text_response, temp_path
     except Exception as e:
@@ -57,9 +42,6 @@ def process_text_input(api_key, text_prompt, selected_voice):
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
-        if not api_key:
-            return "Please enter your OpenAI API key first.", None
         if not audio_path:
             return "Please upload or record audio first.", None
@@ -102,22 +84,13 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
         )
         # Save the audio response
-        if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
-            return "No audio response was generated.", None
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
-        # Get the text response - with proper error handling
-        text_response = "No text response available."
-        if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
-            if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
-                if hasattr(completion.choices[0].message.content[0], 'text'):
-                    text_response = completion.choices[0].message.content[0].text
-            elif isinstance(completion.choices[0].message.content, str):
-                text_response = completion.choices[0].message.content
         return text_response, temp_path
     except Exception as e:
@@ -137,20 +110,17 @@ def download_example_audio():
         return temp_path
     except Exception as e:
-        print(f"Error downloading example audio: {str(e)}")
         return None
 def use_example_audio():
     """Load example audio for the interface"""
     audio_path = download_example_audio()
-    if not audio_path:
-        return None
     return audio_path
 # Create Gradio Interface
-with gr.Blocks(title="VoxTalk") as app:
-    gr.Markdown("# VoxTalk")
-    gr.Markdown("*Voice-to-voice AI interaction platform*")
     # API Key input (used across all tabs)
     api_key = gr.Textbox(
@@ -244,24 +214,12 @@ with gr.Blocks(title="VoxTalk") as app:
                 )
                 # Save the audio to a temporary file
-                if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
-                    return f"No audio response was generated for {voice_type} voice.", None
                 wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
                 temp_path = tempfile.mktemp(suffix=".wav")
                 with open(temp_path, "wb") as f:
                     f.write(wav_bytes)
-                # Get the text response - with proper error handling
-                text_response = "Sample generated"
-                if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
-                    if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
-                        if hasattr(completion.choices[0].message.content[0], 'text'):
-                            text_response = completion.choices[0].message.content[0].text
-                    elif isinstance(completion.choices[0].message.content, str):
-                        text_response = completion.choices[0].message.content
-                return f"Sample: {text_response}", temp_path
             except Exception as e:
                 return f"Error: {str(e)}", None

 def process_text_input(api_key, text_prompt, selected_voice):
     """Generate audio response from text input"""
     try:
         # Initialize OpenAI client with the provided API key
         client = OpenAI(api_key=api_key)
         )
         # Save the audio to a temporary file
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
+        # Get the text response
+        text_response = completion.choices[0].message.content
         return text_response, temp_path
     except Exception as e:
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
         if not audio_path:
             return "Please upload or record audio first.", None
         )
         # Save the audio response
         wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
         temp_path = tempfile.mktemp(suffix=".wav")
         with open(temp_path, "wb") as f:
             f.write(wav_bytes)
+        # Get the text response
+        text_response = completion.choices[0].message.content
         return text_response, temp_path
     except Exception as e:
         return temp_path
     except Exception as e:
         return None
 def use_example_audio():
     """Load example audio for the interface"""
     audio_path = download_example_audio()
     return audio_path
 # Create Gradio Interface
+with gr.Blocks(title="OpenAI Audio Chat App") as app:
+    gr.Markdown("# OpenAI Audio Chat App")
+    gr.Markdown("Interact with GPT-4o audio model through text and audio inputs")
     # API Key input (used across all tabs)
     api_key = gr.Textbox(
                 )
                 # Save the audio to a temporary file
                 wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
                 temp_path = tempfile.mktemp(suffix=".wav")
                 with open(temp_path, "wb") as f:
                     f.write(wav_bytes)
+                return f"Sample generated with voice: {voice_type}", temp_path
             except Exception as e:
                 return f"Error: {str(e)}", None