Spaces:

nari-labs
/

Dia-1.6B

Running on Zero

App Files Files Community

Freddy Boulton commited on 4 days ago

Commit

dbbe3da

1 Parent(s): 596ef1a

Fork

Browse files

Files changed (2) hide show

app.py +53 -7
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Optional, Tuple
 import spaces
 import gradio as gr
 import numpy as np
 import soundfile as sf
 import torch
@@ -218,7 +219,11 @@ css = """
 #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
 """
 # Attempt to load default text from example.txt
-default_text = "[S1] Dia is an open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] Wow. Amazing. (laughs) \n[S2] Try it now on Git hub or Hugging Face."
 example_txt_path = Path("./example.txt")
 if example_txt_path.exists():
     try:
@@ -229,18 +234,47 @@ if example_txt_path.exists():
         print(f"Warning: Could not read example.txt: {e}")
 # Build Gradio UI
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# Nari Text-to-Speech Synthesis")
     with gr.Row(equal_height=False):
         with gr.Column(scale=1):
-            text_input = gr.Textbox(
-                label="Input Text",
-                placeholder="Enter text here...",
                 value=default_text,
-                lines=5,  # Increased lines
             )
             audio_prompt_input = gr.Audio(
                 label="Audio Prompt (Optional)",
                 show_label=True,
@@ -327,7 +361,11 @@ with gr.Blocks(css=css) as demo:
     example_prompt_path = "./example_prompt.mp3"  # Adjust if needed
     examples_list = [
         [
-            "[S1] Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct! \n[S2] Oh my god! Okay.. it's happening. Everybody stay calm! \n[S1] What's the procedure... \n[S2] Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway! ",
             None,
             3072,
             3.0,
@@ -337,7 +375,15 @@ with gr.Blocks(css=css) as demo:
             0.94,
         ],
         [
-            "[S1] Open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] I'm biased, but I think we clearly won. \n[S2] Hard to disagree. (laughs) \n[S1] Thanks for listening to this demo. \n[S2] Try it now on Git hub and Hugging Face. \n[S1] If you liked our model, please give us a star and share to your friends. \n[S2] This was Nari Labs.",
             example_prompt_path if Path(example_prompt_path).exists() else None,
             3072,
             3.0,

 import spaces
 import gradio as gr
+from gradio_dialogue import Dialogue
 import numpy as np
 import soundfile as sf
 import torch
 #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
 """
 # Attempt to load default text from example.txt
+default_text = [{"speaker": "Speaker 1", "text": "Dia is an open weights text to dialogue model."},
+                {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
+                {"speaker": "Speaker 1", "text": "Wow. Amazing. (laughs)"},
+                {"speaker": "Speaker 2", "text": "Try it now on Git hub or Hugging Face."},
+                ]
 example_txt_path = Path("./example.txt")
 if example_txt_path.exists():
     try:
         print(f"Warning: Could not read example.txt: {e}")
+def formatter(speaker, text):
+    speaker = speaker.split(" ")[1]
+    return f"[S{speaker}] {text}"
+emotions = [
+    "(laughs)",
+    "(clears throat)",
+    "(sighs)",
+    "(gasps)",
+    "(coughs)",
+    "(singing)",
+    "(sings)",
+    "(mumbles)",
+    "(beep)",
+    "(groans)",
+    "(sniffs)",
+    "(claps)",
+    "(screams)",
+    "(inhales)",
+    "(exhales)",
+    "(applause)",
+    "(burps)",
+    "(humming)",
+    "(sneezes)",
+    "(chuckle)",
+    "(whistles)",
+]
 # Build Gradio UI
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# Nari Text-to-Speech Synthesis")
     with gr.Row(equal_height=False):
         with gr.Column(scale=1):
+            text_input = Dialogue(
+                speakers=["S1", "S2"],
+                emotions=emotions,
+                formatter=formatter,
                 value=default_text,
             )
             audio_prompt_input = gr.Audio(
                 label="Audio Prompt (Optional)",
                 show_label=True,
     example_prompt_path = "./example_prompt.mp3"  # Adjust if needed
     examples_list = [
         [
+            [{"speaker": "Speaker 1", "text": "Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct!"},
+            {"speaker": "Speaker 2", "text": "Oh my god! Okay.. it's happening. Everybody stay calm!"},
+            {"speaker": "Speaker 1", "text": "What's the procedure..."},
+            {"speaker": "Speaker 2", "text": "Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway!"},
+            ],
             None,
             3072,
             3.0,
             0.94,
         ],
         [
+            [{"speaker": "Speaker 1", "text": "Open weights text to dialogue model."},
+            {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
+            {"speaker": "Speaker 1", "text": "I'm biased, but I think we clearly won."},
+            {"speaker": "Speaker 2", "text": "Hard to disagree. (laughs)"},
+            {"speaker": "Speaker 1", "text": "Thanks for listening to this demo."},
+            {"speaker": "Speaker 2", "text": "Try it now on Git hub and Hugging Face."},
+            {"speaker": "Speaker 1", "text": "If you liked our model, please give us a star and share to your friends."},
+            {"speaker": "Speaker 2", "text": "This was Nari Labs."},
+            ],
             example_prompt_path if Path(example_prompt_path).exists() else None,
             3072,
             3.0,

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ pydantic>=2.11.3
 soundfile>=0.13.1
 torchaudio>=2.0.0
 torch>=2.0.0

 soundfile>=0.13.1
 torchaudio>=2.0.0
 torch>=2.0.0
+gradio-dialogue>=0.0.4