shukdevdatta123 commited on
Commit
e2170c7
·
verified ·
1 Parent(s): 0a8ed36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -10
app.py CHANGED
@@ -32,7 +32,7 @@ def process_text_input(api_key, text_prompt, selected_voice):
32
  with open(temp_path, "wb") as f:
33
  f.write(wav_bytes)
34
 
35
- # Get the text response
36
  text_response = completion.choices[0].message.content
37
 
38
  return text_response, temp_path
@@ -96,6 +96,24 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
96
  except Exception as e:
97
  return f"Error: {str(e)}", None
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def download_example_audio():
100
  """Download an example audio file for testing"""
101
  try:
@@ -147,11 +165,24 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
147
  with gr.Column():
148
  text_output = gr.Textbox(label="AI Response (Text)", lines=5)
149
  audio_output = gr.Audio(label="AI Response (Audio)")
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  text_submit.click(
152
- fn=process_text_input,
153
  inputs=[api_key, text_input, text_voice],
154
- outputs=[text_output, audio_output]
155
  )
156
 
157
  with gr.Tab("Audio Input to Audio Response"):
@@ -179,11 +210,30 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
179
  with gr.Column():
180
  audio_text_output = gr.Textbox(label="AI Response (Text)", lines=5)
181
  audio_audio_output = gr.Audio(label="AI Response (Audio)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  audio_submit.click(
184
- fn=process_audio_input,
185
  inputs=[api_key, audio_input, accompanying_text, audio_voice],
186
- outputs=[audio_text_output, audio_audio_output]
187
  )
188
 
189
  example_btn.click(
@@ -198,7 +248,7 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
198
  def generate_voice_sample(api_key, voice_type):
199
  try:
200
  if not api_key:
201
- return "Please enter your OpenAI API key first.", None
202
 
203
  client = OpenAI(api_key=api_key)
204
  completion = client.chat.completions.create(
@@ -219,9 +269,12 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
219
  with open(temp_path, "wb") as f:
220
  f.write(wav_bytes)
221
 
222
- return f"Sample generated with voice: {voice_type}", temp_path
 
 
 
223
  except Exception as e:
224
- return f"Error: {str(e)}", None
225
 
226
  with gr.Row():
227
  sample_voice = gr.Dropdown(
@@ -234,19 +287,21 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
234
  with gr.Row():
235
  sample_text = gr.Textbox(label="Status")
236
  sample_audio = gr.Audio(label="Voice Sample")
 
237
 
238
  sample_btn.click(
239
  fn=generate_voice_sample,
240
  inputs=[api_key, sample_voice],
241
- outputs=[sample_text, sample_audio]
242
  )
243
 
244
  gr.Markdown("""
245
  ## Notes:
246
  - You must provide your OpenAI API key in the field above
247
- - The model used is `gpt-4o-audio-preview`
248
  - Audio inputs should be in WAV format
249
  - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
 
250
  """)
251
 
252
  if __name__ == "__main__":
 
32
  with open(temp_path, "wb") as f:
33
  f.write(wav_bytes)
34
 
35
+ # Get the text response directly from the API
36
  text_response = completion.choices[0].message.content
37
 
38
  return text_response, temp_path
 
96
  except Exception as e:
97
  return f"Error: {str(e)}", None
98
 
99
+ def transcribe_audio(api_key, audio_path):
100
+ """Transcribe an audio file using OpenAI's API"""
101
+ try:
102
+ if not audio_path:
103
+ return "No audio file provided for transcription."
104
+
105
+ client = OpenAI(api_key=api_key)
106
+
107
+ with open(audio_path, "rb") as audio_file:
108
+ transcription = client.audio.transcriptions.create(
109
+ model="gpt-4o-transcribe",
110
+ file=audio_file
111
+ )
112
+
113
+ return transcription.text
114
+ except Exception as e:
115
+ return f"Transcription error: {str(e)}"
116
+
117
  def download_example_audio():
118
  """Download an example audio file for testing"""
119
  try:
 
165
  with gr.Column():
166
  text_output = gr.Textbox(label="AI Response (Text)", lines=5)
167
  audio_output = gr.Audio(label="AI Response (Audio)")
168
+ transcribed_output = gr.Textbox(label="Transcription of Audio Response", lines=3)
169
+
170
+ # Function to process text input and then transcribe the resulting audio
171
+ def text_input_with_transcription(api_key, text_prompt, voice):
172
+ text_response, audio_path = process_text_input(api_key, text_prompt, voice)
173
+
174
+ # Get transcription of the generated audio
175
+ if audio_path:
176
+ transcription = transcribe_audio(api_key, audio_path)
177
+ else:
178
+ transcription = "No audio generated to transcribe."
179
+
180
+ return text_response, audio_path, transcription
181
 
182
  text_submit.click(
183
+ fn=text_input_with_transcription,
184
  inputs=[api_key, text_input, text_voice],
185
+ outputs=[text_output, audio_output, transcribed_output]
186
  )
187
 
188
  with gr.Tab("Audio Input to Audio Response"):
 
210
  with gr.Column():
211
  audio_text_output = gr.Textbox(label="AI Response (Text)", lines=5)
212
  audio_audio_output = gr.Audio(label="AI Response (Audio)")
213
+ audio_transcribed_output = gr.Textbox(label="Transcription of Audio Response", lines=3)
214
+ input_transcription = gr.Textbox(label="Transcription of Input Audio", lines=3)
215
+
216
+ # Function to process audio input, generate response, and provide transcriptions
217
+ def audio_input_with_transcription(api_key, audio_path, text_prompt, voice):
218
+ # First transcribe the input audio
219
+ input_transcription = "N/A"
220
+ if audio_path:
221
+ input_transcription = transcribe_audio(api_key, audio_path)
222
+
223
+ # Process the audio input and get response
224
+ text_response, response_audio_path = process_audio_input(api_key, audio_path, text_prompt, voice)
225
+
226
+ # Transcribe the response audio
227
+ response_transcription = "No audio generated to transcribe."
228
+ if response_audio_path:
229
+ response_transcription = transcribe_audio(api_key, response_audio_path)
230
+
231
+ return text_response, response_audio_path, response_transcription, input_transcription
232
 
233
  audio_submit.click(
234
+ fn=audio_input_with_transcription,
235
  inputs=[api_key, audio_input, accompanying_text, audio_voice],
236
+ outputs=[audio_text_output, audio_audio_output, audio_transcribed_output, input_transcription]
237
  )
238
 
239
  example_btn.click(
 
248
  def generate_voice_sample(api_key, voice_type):
249
  try:
250
  if not api_key:
251
+ return "Please enter your OpenAI API key first.", None, "No transcription available."
252
 
253
  client = OpenAI(api_key=api_key)
254
  completion = client.chat.completions.create(
 
269
  with open(temp_path, "wb") as f:
270
  f.write(wav_bytes)
271
 
272
+ # Get transcription
273
+ transcription = transcribe_audio(api_key, temp_path)
274
+
275
+ return f"Sample generated with voice: {voice_type}", temp_path, transcription
276
  except Exception as e:
277
+ return f"Error: {str(e)}", None, "No transcription available."
278
 
279
  with gr.Row():
280
  sample_voice = gr.Dropdown(
 
287
  with gr.Row():
288
  sample_text = gr.Textbox(label="Status")
289
  sample_audio = gr.Audio(label="Voice Sample")
290
+ sample_transcription = gr.Textbox(label="Transcription", lines=3)
291
 
292
  sample_btn.click(
293
  fn=generate_voice_sample,
294
  inputs=[api_key, sample_voice],
295
+ outputs=[sample_text, sample_audio, sample_transcription]
296
  )
297
 
298
  gr.Markdown("""
299
  ## Notes:
300
  - You must provide your OpenAI API key in the field above
301
+ - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
302
  - Audio inputs should be in WAV format
303
  - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
304
+ - Each audio response is automatically transcribed for verification
305
  """)
306
 
307
  if __name__ == "__main__":