shukdevdatta123 commited on
Commit
d11d2e2
·
verified ·
1 Parent(s): 39187a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -2
app.py CHANGED
@@ -22,6 +22,19 @@ EXAMPLE_AUDIO_URLS = [
22
  "https://cdn.openai.com/API/docs/audio/shimmer.wav"
23
  ]
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def process_text_input(api_key, text_prompt, selected_voice):
26
  """Generate audio response from text input"""
27
  try:
@@ -128,6 +141,24 @@ def transcribe_audio(api_key, audio_path):
128
  except Exception as e:
129
  return f"Transcription error: {str(e)}"
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def download_example_audio():
132
  """Download a random example audio file for testing"""
133
  try:
@@ -315,14 +346,63 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
315
  outputs=[sample_text, sample_audio, sample_transcription]
316
  )
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  gr.Markdown("""
319
  ## Notes:
320
  - You must provide your OpenAI API key in the field above
321
- - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
322
- - Audio inputs should be in WAV format
323
  - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
324
  - Each audio response is automatically transcribed for verification
325
  - The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
 
326
  """)
327
 
328
  if __name__ == "__main__":
 
22
  "https://cdn.openai.com/API/docs/audio/shimmer.wav"
23
  ]
24
 
25
+ # Supported languages for translation
26
+ SUPPORTED_LANGUAGES = [
27
+ "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian",
28
+ "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch",
29
+ "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek",
30
+ "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese",
31
+ "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay",
32
+ "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese",
33
+ "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili",
34
+ "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
35
+ "Vietnamese", "Welsh"
36
+ ]
37
+
38
  def process_text_input(api_key, text_prompt, selected_voice):
39
  """Generate audio response from text input"""
40
  try:
 
141
  except Exception as e:
142
  return f"Transcription error: {str(e)}"
143
 
144
+ def translate_audio(api_key, audio_path):
145
+ """Translate audio to English using OpenAI's Whisper model"""
146
+ try:
147
+ if not audio_path:
148
+ return "No audio file provided for translation."
149
+
150
+ client = OpenAI(api_key=api_key)
151
+
152
+ with open(audio_path, "rb") as audio_file:
153
+ translation = client.audio.translations.create(
154
+ model="whisper-1",
155
+ file=audio_file
156
+ )
157
+
158
+ return translation.text
159
+ except Exception as e:
160
+ return f"Translation error: {str(e)}"
161
+
162
  def download_example_audio():
163
  """Download a random example audio file for testing"""
164
  try:
 
346
  outputs=[sample_text, sample_audio, sample_transcription]
347
  )
348
 
349
+ # New tab for audio translation
350
+ with gr.Tab("Audio Translation"):
351
+ gr.Markdown("## Translate audio from other languages to English")
352
+ gr.Markdown("Supports 50+ languages including: Arabic, Chinese, French, German, Japanese, Spanish, and many more.")
353
+
354
+ with gr.Row():
355
+ with gr.Column():
356
+ translation_audio_input = gr.Audio(
357
+ label="Audio to Translate",
358
+ type="filepath",
359
+ sources=["microphone", "upload"]
360
+ )
361
+
362
+ translate_btn = gr.Button("Translate to English")
363
+
364
+ with gr.Column():
365
+ translation_output = gr.Textbox(label="English Translation", lines=5)
366
+ original_transcription = gr.Textbox(label="Original Transcription (if available)", lines=5)
367
+
368
+ def translate_audio_input(api_key, audio_path):
369
+ """Handle the translation of uploaded audio"""
370
+ try:
371
+ if not audio_path:
372
+ return "Please upload or record audio first.", "No audio to transcribe."
373
+
374
+ # Get the translation
375
+ translation = translate_audio(api_key, audio_path)
376
+
377
+ # Try to get original transcription (this might be in the original language)
378
+ try:
379
+ original = transcribe_audio(api_key, audio_path)
380
+ except:
381
+ original = "Could not transcribe original audio."
382
+
383
+ return translation, original
384
+ except Exception as e:
385
+ return f"Translation error: {str(e)}", "Error occurred during processing."
386
+
387
+ translate_btn.click(
388
+ fn=translate_audio_input,
389
+ inputs=[api_key, translation_audio_input],
390
+ outputs=[translation_output, original_transcription]
391
+ )
392
+
393
+ # Show supported languages
394
+ with gr.Accordion("Supported Languages", open=False):
395
+ gr.Markdown(", ".join(SUPPORTED_LANGUAGES))
396
+
397
  gr.Markdown("""
398
  ## Notes:
399
  - You must provide your OpenAI API key in the field above
400
+ - The model used is `gpt-4o-audio-preview` for conversation, `gpt-4o-transcribe` for transcriptions, and `whisper-1` for translations
401
+ - Audio inputs should be in WAV format for chat and any supported format for translation
402
  - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
403
  - Each audio response is automatically transcribed for verification
404
  - The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
405
+ - The translation feature supports 50+ languages, translating them to English
406
  """)
407
 
408
  if __name__ == "__main__":