Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,12 @@ VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "s
|
|
11 |
def process_text_input(api_key, text_prompt, selected_voice):
|
12 |
"""Generate audio response from text input"""
|
13 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Initialize OpenAI client with the provided API key
|
15 |
client = OpenAI(api_key=api_key)
|
16 |
|
@@ -27,13 +33,22 @@ def process_text_input(api_key, text_prompt, selected_voice):
|
|
27 |
)
|
28 |
|
29 |
# Save the audio to a temporary file
|
|
|
|
|
|
|
30 |
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
31 |
temp_path = tempfile.mktemp(suffix=".wav")
|
32 |
with open(temp_path, "wb") as f:
|
33 |
f.write(wav_bytes)
|
34 |
|
35 |
-
# Get the text response -
|
36 |
-
text_response =
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return text_response, temp_path
|
39 |
except Exception as e:
|
@@ -42,6 +57,9 @@ def process_text_input(api_key, text_prompt, selected_voice):
|
|
42 |
def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
43 |
"""Process audio input and generate a response"""
|
44 |
try:
|
|
|
|
|
|
|
45 |
if not audio_path:
|
46 |
return "Please upload or record audio first.", None
|
47 |
|
@@ -84,13 +102,22 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
|
84 |
)
|
85 |
|
86 |
# Save the audio response
|
|
|
|
|
|
|
87 |
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
88 |
temp_path = tempfile.mktemp(suffix=".wav")
|
89 |
with open(temp_path, "wb") as f:
|
90 |
f.write(wav_bytes)
|
91 |
|
92 |
-
# Get the text response -
|
93 |
-
text_response =
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
return text_response, temp_path
|
96 |
except Exception as e:
|
@@ -110,11 +137,14 @@ def download_example_audio():
|
|
110 |
|
111 |
return temp_path
|
112 |
except Exception as e:
|
|
|
113 |
return None
|
114 |
|
115 |
def use_example_audio():
|
116 |
"""Load example audio for the interface"""
|
117 |
audio_path = download_example_audio()
|
|
|
|
|
118 |
return audio_path
|
119 |
|
120 |
# Create Gradio Interface
|
@@ -214,13 +244,22 @@ with gr.Blocks(title="VoxTalk") as app:
|
|
214 |
)
|
215 |
|
216 |
# Save the audio to a temporary file
|
|
|
|
|
|
|
217 |
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
218 |
temp_path = tempfile.mktemp(suffix=".wav")
|
219 |
with open(temp_path, "wb") as f:
|
220 |
f.write(wav_bytes)
|
221 |
|
222 |
-
#
|
223 |
-
text_response =
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
return f"Sample: {text_response}", temp_path
|
226 |
except Exception as e:
|
|
|
11 |
def process_text_input(api_key, text_prompt, selected_voice):
|
12 |
"""Generate audio response from text input"""
|
13 |
try:
|
14 |
+
if not api_key:
|
15 |
+
return "Please enter your OpenAI API key first.", None
|
16 |
+
|
17 |
+
if not text_prompt:
|
18 |
+
return "Please enter a text prompt first.", None
|
19 |
+
|
20 |
# Initialize OpenAI client with the provided API key
|
21 |
client = OpenAI(api_key=api_key)
|
22 |
|
|
|
33 |
)
|
34 |
|
35 |
# Save the audio to a temporary file
|
36 |
+
if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
|
37 |
+
return "No audio response was generated.", None
|
38 |
+
|
39 |
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
40 |
temp_path = tempfile.mktemp(suffix=".wav")
|
41 |
with open(temp_path, "wb") as f:
|
42 |
f.write(wav_bytes)
|
43 |
|
44 |
+
# Get the text response - with proper error handling
|
45 |
+
text_response = "No text response available."
|
46 |
+
if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
|
47 |
+
if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
|
48 |
+
if hasattr(completion.choices[0].message.content[0], 'text'):
|
49 |
+
text_response = completion.choices[0].message.content[0].text
|
50 |
+
elif isinstance(completion.choices[0].message.content, str):
|
51 |
+
text_response = completion.choices[0].message.content
|
52 |
|
53 |
return text_response, temp_path
|
54 |
except Exception as e:
|
|
|
57 |
def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
58 |
"""Process audio input and generate a response"""
|
59 |
try:
|
60 |
+
if not api_key:
|
61 |
+
return "Please enter your OpenAI API key first.", None
|
62 |
+
|
63 |
if not audio_path:
|
64 |
return "Please upload or record audio first.", None
|
65 |
|
|
|
102 |
)
|
103 |
|
104 |
# Save the audio response
|
105 |
+
if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
|
106 |
+
return "No audio response was generated.", None
|
107 |
+
|
108 |
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
109 |
temp_path = tempfile.mktemp(suffix=".wav")
|
110 |
with open(temp_path, "wb") as f:
|
111 |
f.write(wav_bytes)
|
112 |
|
113 |
+
# Get the text response - with proper error handling
|
114 |
+
text_response = "No text response available."
|
115 |
+
if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
|
116 |
+
if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
|
117 |
+
if hasattr(completion.choices[0].message.content[0], 'text'):
|
118 |
+
text_response = completion.choices[0].message.content[0].text
|
119 |
+
elif isinstance(completion.choices[0].message.content, str):
|
120 |
+
text_response = completion.choices[0].message.content
|
121 |
|
122 |
return text_response, temp_path
|
123 |
except Exception as e:
|
|
|
137 |
|
138 |
return temp_path
|
139 |
except Exception as e:
|
140 |
+
print(f"Error downloading example audio: {str(e)}")
|
141 |
return None
|
142 |
|
143 |
def use_example_audio():
|
144 |
"""Load example audio for the interface"""
|
145 |
audio_path = download_example_audio()
|
146 |
+
if not audio_path:
|
147 |
+
return None
|
148 |
return audio_path
|
149 |
|
150 |
# Create Gradio Interface
|
|
|
244 |
)
|
245 |
|
246 |
# Save the audio to a temporary file
|
247 |
+
if not hasattr(completion.choices[0].message, 'audio') or not completion.choices[0].message.audio:
|
248 |
+
return f"No audio response was generated for {voice_type} voice.", None
|
249 |
+
|
250 |
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
251 |
temp_path = tempfile.mktemp(suffix=".wav")
|
252 |
with open(temp_path, "wb") as f:
|
253 |
f.write(wav_bytes)
|
254 |
|
255 |
+
# Get the text response - with proper error handling
|
256 |
+
text_response = "Sample generated"
|
257 |
+
if hasattr(completion.choices[0].message, 'content') and completion.choices[0].message.content:
|
258 |
+
if isinstance(completion.choices[0].message.content, list) and len(completion.choices[0].message.content) > 0:
|
259 |
+
if hasattr(completion.choices[0].message.content[0], 'text'):
|
260 |
+
text_response = completion.choices[0].message.content[0].text
|
261 |
+
elif isinstance(completion.choices[0].message.content, str):
|
262 |
+
text_response = completion.choices[0].message.content
|
263 |
|
264 |
return f"Sample: {text_response}", temp_path
|
265 |
except Exception as e:
|