Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,19 @@ EXAMPLE_AUDIO_URLS = [
|
|
22 |
"https://cdn.openai.com/API/docs/audio/shimmer.wav"
|
23 |
]
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def process_text_input(api_key, text_prompt, selected_voice):
|
26 |
"""Generate audio response from text input"""
|
27 |
try:
|
@@ -128,6 +141,24 @@ def transcribe_audio(api_key, audio_path):
|
|
128 |
except Exception as e:
|
129 |
return f"Transcription error: {str(e)}"
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def download_example_audio():
|
132 |
"""Download a random example audio file for testing"""
|
133 |
try:
|
@@ -315,14 +346,63 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
|
|
315 |
outputs=[sample_text, sample_audio, sample_transcription]
|
316 |
)
|
317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
gr.Markdown("""
|
319 |
## Notes:
|
320 |
- You must provide your OpenAI API key in the field above
|
321 |
-
- The model used is `gpt-4o-audio-preview` for conversation
|
322 |
-
- Audio inputs should be in WAV format
|
323 |
- Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
|
324 |
- Each audio response is automatically transcribed for verification
|
325 |
- The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
|
|
|
326 |
""")
|
327 |
|
328 |
if __name__ == "__main__":
|
|
|
22 |
"https://cdn.openai.com/API/docs/audio/shimmer.wav"
|
23 |
]
|
24 |
|
25 |
+
# Supported languages for translation
|
26 |
+
SUPPORTED_LANGUAGES = [
|
27 |
+
"Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian",
|
28 |
+
"Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch",
|
29 |
+
"English", "Estonian", "Finnish", "French", "Galician", "German", "Greek",
|
30 |
+
"Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese",
|
31 |
+
"Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay",
|
32 |
+
"Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese",
|
33 |
+
"Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili",
|
34 |
+
"Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
|
35 |
+
"Vietnamese", "Welsh"
|
36 |
+
]
|
37 |
+
|
38 |
def process_text_input(api_key, text_prompt, selected_voice):
|
39 |
"""Generate audio response from text input"""
|
40 |
try:
|
|
|
141 |
except Exception as e:
|
142 |
return f"Transcription error: {str(e)}"
|
143 |
|
144 |
+
def translate_audio(api_key, audio_path):
|
145 |
+
"""Translate audio to English using OpenAI's Whisper model"""
|
146 |
+
try:
|
147 |
+
if not audio_path:
|
148 |
+
return "No audio file provided for translation."
|
149 |
+
|
150 |
+
client = OpenAI(api_key=api_key)
|
151 |
+
|
152 |
+
with open(audio_path, "rb") as audio_file:
|
153 |
+
translation = client.audio.translations.create(
|
154 |
+
model="whisper-1",
|
155 |
+
file=audio_file
|
156 |
+
)
|
157 |
+
|
158 |
+
return translation.text
|
159 |
+
except Exception as e:
|
160 |
+
return f"Translation error: {str(e)}"
|
161 |
+
|
162 |
def download_example_audio():
|
163 |
"""Download a random example audio file for testing"""
|
164 |
try:
|
|
|
346 |
outputs=[sample_text, sample_audio, sample_transcription]
|
347 |
)
|
348 |
|
349 |
+
# New tab for audio translation
|
350 |
+
with gr.Tab("Audio Translation"):
|
351 |
+
gr.Markdown("## Translate audio from other languages to English")
|
352 |
+
gr.Markdown("Supports 50+ languages including: Arabic, Chinese, French, German, Japanese, Spanish, and many more.")
|
353 |
+
|
354 |
+
with gr.Row():
|
355 |
+
with gr.Column():
|
356 |
+
translation_audio_input = gr.Audio(
|
357 |
+
label="Audio to Translate",
|
358 |
+
type="filepath",
|
359 |
+
sources=["microphone", "upload"]
|
360 |
+
)
|
361 |
+
|
362 |
+
translate_btn = gr.Button("Translate to English")
|
363 |
+
|
364 |
+
with gr.Column():
|
365 |
+
translation_output = gr.Textbox(label="English Translation", lines=5)
|
366 |
+
original_transcription = gr.Textbox(label="Original Transcription (if available)", lines=5)
|
367 |
+
|
368 |
+
def translate_audio_input(api_key, audio_path):
|
369 |
+
"""Handle the translation of uploaded audio"""
|
370 |
+
try:
|
371 |
+
if not audio_path:
|
372 |
+
return "Please upload or record audio first.", "No audio to transcribe."
|
373 |
+
|
374 |
+
# Get the translation
|
375 |
+
translation = translate_audio(api_key, audio_path)
|
376 |
+
|
377 |
+
# Try to get original transcription (this might be in the original language)
|
378 |
+
try:
|
379 |
+
original = transcribe_audio(api_key, audio_path)
|
380 |
+
except:
|
381 |
+
original = "Could not transcribe original audio."
|
382 |
+
|
383 |
+
return translation, original
|
384 |
+
except Exception as e:
|
385 |
+
return f"Translation error: {str(e)}", "Error occurred during processing."
|
386 |
+
|
387 |
+
translate_btn.click(
|
388 |
+
fn=translate_audio_input,
|
389 |
+
inputs=[api_key, translation_audio_input],
|
390 |
+
outputs=[translation_output, original_transcription]
|
391 |
+
)
|
392 |
+
|
393 |
+
# Show supported languages
|
394 |
+
with gr.Accordion("Supported Languages", open=False):
|
395 |
+
gr.Markdown(", ".join(SUPPORTED_LANGUAGES))
|
396 |
+
|
397 |
gr.Markdown("""
|
398 |
## Notes:
|
399 |
- You must provide your OpenAI API key in the field above
|
400 |
+
- The model used is `gpt-4o-audio-preview` for conversation, `gpt-4o-transcribe` for transcriptions, and `whisper-1` for translations
|
401 |
+
- Audio inputs should be in WAV format for chat and any supported format for translation
|
402 |
- Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
|
403 |
- Each audio response is automatically transcribed for verification
|
404 |
- The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
|
405 |
+
- The translation feature supports 50+ languages, translating them to English
|
406 |
""")
|
407 |
|
408 |
if __name__ == "__main__":
|