Spaces:

shukdevdatta123
/

WaveTalk

Running

App Files Files Community

shukdevdatta123 commited on 9 days ago

Commit

d11d2e2

verified ·

1 Parent(s): 39187a7

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -2

app.py CHANGED Viewed

@@ -22,6 +22,19 @@ EXAMPLE_AUDIO_URLS = [
     "https://cdn.openai.com/API/docs/audio/shimmer.wav"
 ]
 def process_text_input(api_key, text_prompt, selected_voice):
     """Generate audio response from text input"""
     try:
@@ -128,6 +141,24 @@ def transcribe_audio(api_key, audio_path):
     except Exception as e:
         return f"Transcription error: {str(e)}"
 def download_example_audio():
     """Download a random example audio file for testing"""
     try:
@@ -315,14 +346,63 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
             outputs=[sample_text, sample_audio, sample_transcription]
         )
     gr.Markdown("""
     ## Notes:
     - You must provide your OpenAI API key in the field above
-    - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
-    - Audio inputs should be in WAV format
     - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
     - Each audio response is automatically transcribed for verification
     - The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
     """)
 if __name__ == "__main__":

     "https://cdn.openai.com/API/docs/audio/shimmer.wav"
 ]
+# Supported languages for translation
+SUPPORTED_LANGUAGES = [
+    "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian",
+    "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch",
+    "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek",
+    "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese",
+    "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay",
+    "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese",
+    "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili",
+    "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
+    "Vietnamese", "Welsh"
+]
 def process_text_input(api_key, text_prompt, selected_voice):
     """Generate audio response from text input"""
     try:
     except Exception as e:
         return f"Transcription error: {str(e)}"
+def translate_audio(api_key, audio_path):
+    """Translate audio to English using OpenAI's Whisper model"""
+    try:
+        if not audio_path:
+            return "No audio file provided for translation."
+        client = OpenAI(api_key=api_key)
+        with open(audio_path, "rb") as audio_file:
+            translation = client.audio.translations.create(
+                model="whisper-1",
+                file=audio_file
+            )
+        return translation.text
+    except Exception as e:
+        return f"Translation error: {str(e)}"
 def download_example_audio():
     """Download a random example audio file for testing"""
     try:
             outputs=[sample_text, sample_audio, sample_transcription]
         )
+    # New tab for audio translation
+    with gr.Tab("Audio Translation"):
+        gr.Markdown("## Translate audio from other languages to English")
+        gr.Markdown("Supports 50+ languages including: Arabic, Chinese, French, German, Japanese, Spanish, and many more.")
+        with gr.Row():
+            with gr.Column():
+                translation_audio_input = gr.Audio(
+                    label="Audio to Translate",
+                    type="filepath",
+                    sources=["microphone", "upload"]
+                )
+                translate_btn = gr.Button("Translate to English")
+            with gr.Column():
+                translation_output = gr.Textbox(label="English Translation", lines=5)
+                original_transcription = gr.Textbox(label="Original Transcription (if available)", lines=5)
+        def translate_audio_input(api_key, audio_path):
+            """Handle the translation of uploaded audio"""
+            try:
+                if not audio_path:
+                    return "Please upload or record audio first.", "No audio to transcribe."
+                # Get the translation
+                translation = translate_audio(api_key, audio_path)
+                # Try to get original transcription (this might be in the original language)
+                try:
+                    original = transcribe_audio(api_key, audio_path)
+                except:
+                    original = "Could not transcribe original audio."
+                return translation, original
+            except Exception as e:
+                return f"Translation error: {str(e)}", "Error occurred during processing."
+        translate_btn.click(
+            fn=translate_audio_input,
+            inputs=[api_key, translation_audio_input],
+            outputs=[translation_output, original_transcription]
+        )
+        # Show supported languages
+        with gr.Accordion("Supported Languages", open=False):
+            gr.Markdown(", ".join(SUPPORTED_LANGUAGES))
     gr.Markdown("""
     ## Notes:
     - You must provide your OpenAI API key in the field above
+    - The model used is `gpt-4o-audio-preview` for conversation, `gpt-4o-transcribe` for transcriptions, and `whisper-1` for translations
+    - Audio inputs should be in WAV format for chat and any supported format for translation
     - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
     - Each audio response is automatically transcribed for verification
     - The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
+    - The translation feature supports 50+ languages, translating them to English
     """)
 if __name__ == "__main__":