Try Dialogue Component 2

#11
by freddyaboulton HF Staff - opened
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +54 -8
  3. requirements.txt +1 -2
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: purple
5
  colorTo: yellow
6
  python_version: 3.10
7
  sdk: gradio
8
- sdk_version: 5.25.2
9
  app_file: app.py
10
  pinned: false
11
  short_description: Generate realistic dialogue from a script, using Dia!
 
5
  colorTo: yellow
6
  python_version: 3.10
7
  sdk: gradio
8
+ sdk_version: 5.27.0
9
  app_file: app.py
10
  pinned: false
11
  short_description: Generate realistic dialogue from a script, using Dia!
app.py CHANGED
@@ -5,6 +5,7 @@ from typing import Optional, Tuple
5
  import spaces
6
 
7
  import gradio as gr
 
8
  import numpy as np
9
  import soundfile as sf
10
  import torch
@@ -218,7 +219,11 @@ css = """
218
  #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
219
  """
220
  # Attempt to load default text from example.txt
221
- default_text = "[S1] Dia is an open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] Wow. Amazing. (laughs) \n[S2] Try it now on Git hub or Hugging Face."
 
 
 
 
222
  example_txt_path = Path("./example.txt")
223
  if example_txt_path.exists():
224
  try:
@@ -229,18 +234,47 @@ if example_txt_path.exists():
229
  print(f"Warning: Could not read example.txt: {e}")
230
 
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  # Build Gradio UI
233
  with gr.Blocks(css=css) as demo:
234
  gr.Markdown("# Nari Text-to-Speech Synthesis")
235
 
236
  with gr.Row(equal_height=False):
237
  with gr.Column(scale=1):
238
- text_input = gr.Textbox(
239
- label="Input Text",
240
- placeholder="Enter text here...",
 
241
  value=default_text,
242
- lines=5, # Increased lines
243
  )
 
244
  audio_prompt_input = gr.Audio(
245
  label="Audio Prompt (Optional)",
246
  show_label=True,
@@ -327,7 +361,11 @@ with gr.Blocks(css=css) as demo:
327
  example_prompt_path = "./example_prompt.mp3" # Adjust if needed
328
  examples_list = [
329
  [
330
- "[S1] Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct! \n[S2] Oh my god! Okay.. it's happening. Everybody stay calm! \n[S1] What's the procedure... \n[S2] Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway! ",
 
 
 
 
331
  None,
332
  3072,
333
  3.0,
@@ -337,7 +375,15 @@ with gr.Blocks(css=css) as demo:
337
  0.94,
338
  ],
339
  [
340
- "[S1] Open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] I'm biased, but I think we clearly won. \n[S2] Hard to disagree. (laughs) \n[S1] Thanks for listening to this demo. \n[S2] Try it now on Git hub and Hugging Face. \n[S1] If you liked our model, please give us a star and share to your friends. \n[S2] This was Nari Labs.",
 
 
 
 
 
 
 
 
341
  example_prompt_path if Path(example_prompt_path).exists() else None,
342
  3072,
343
  3.0,
@@ -375,4 +421,4 @@ if __name__ == "__main__":
375
 
376
  # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values
377
  # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker
378
- demo.launch()
 
5
  import spaces
6
 
7
  import gradio as gr
8
+ from gradio_dialogue import Dialogue
9
  import numpy as np
10
  import soundfile as sf
11
  import torch
 
219
  #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
220
  """
221
  # Attempt to load default text from example.txt
222
+ default_text = [{"speaker": "Speaker 1", "text": "Dia is an open weights text to dialogue model."},
223
+ {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
224
+ {"speaker": "Speaker 1", "text": "Wow. Amazing. (laughs)"},
225
+ {"speaker": "Speaker 2", "text": "Try it now on Git hub or Hugging Face."},
226
+ ]
227
  example_txt_path = Path("./example.txt")
228
  if example_txt_path.exists():
229
  try:
 
234
  print(f"Warning: Could not read example.txt: {e}")
235
 
236
 
237
+ def formatter(speaker, text):
238
+ speaker = speaker.split(" ")[1]
239
+ return f"[S{speaker}] {text}"
240
+
241
+ emotions = [
242
+ "(laughs)",
243
+ "(clears throat)",
244
+ "(sighs)",
245
+ "(gasps)",
246
+ "(coughs)",
247
+ "(singing)",
248
+ "(sings)",
249
+ "(mumbles)",
250
+ "(beep)",
251
+ "(groans)",
252
+ "(sniffs)",
253
+ "(claps)",
254
+ "(screams)",
255
+ "(inhales)",
256
+ "(exhales)",
257
+ "(applause)",
258
+ "(burps)",
259
+ "(humming)",
260
+ "(sneezes)",
261
+ "(chuckle)",
262
+ "(whistles)",
263
+ ]
264
+
265
  # Build Gradio UI
266
  with gr.Blocks(css=css) as demo:
267
  gr.Markdown("# Nari Text-to-Speech Synthesis")
268
 
269
  with gr.Row(equal_height=False):
270
  with gr.Column(scale=1):
271
+ text_input = Dialogue(
272
+ speakers=["Speaker 1", "Speaker 2"],
273
+ emotions=emotions,
274
+ formatter=formatter,
275
  value=default_text,
 
276
  )
277
+
278
  audio_prompt_input = gr.Audio(
279
  label="Audio Prompt (Optional)",
280
  show_label=True,
 
361
  example_prompt_path = "./example_prompt.mp3" # Adjust if needed
362
  examples_list = [
363
  [
364
+ [{"speaker": "Speaker 1", "text": "Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct!"},
365
+ {"speaker": "Speaker 2", "text": "Oh my god! Okay.. it's happening. Everybody stay calm!"},
366
+ {"speaker": "Speaker 1", "text": "What's the procedure..."},
367
+ {"speaker": "Speaker 2", "text": "Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway!"},
368
+ ],
369
  None,
370
  3072,
371
  3.0,
 
375
  0.94,
376
  ],
377
  [
378
+ [{"speaker": "Speaker 1", "text": "Open weights text to dialogue model."},
379
+ {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
380
+ {"speaker": "Speaker 1", "text": "I'm biased, but I think we clearly won."},
381
+ {"speaker": "Speaker 2", "text": "Hard to disagree. (laughs)"},
382
+ {"speaker": "Speaker 1", "text": "Thanks for listening to this demo."},
383
+ {"speaker": "Speaker 2", "text": "Try it now on Git hub and Hugging Face."},
384
+ {"speaker": "Speaker 1", "text": "If you liked our model, please give us a star and share to your friends."},
385
+ {"speaker": "Speaker 2", "text": "This was Nari Labs."},
386
+ ],
387
  example_prompt_path if Path(example_prompt_path).exists() else None,
388
  3072,
389
  3.0,
 
421
 
422
  # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values
423
  # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker
424
+ demo.launch(ssr_mode=False)
requirements.txt CHANGED
@@ -1,9 +1,8 @@
1
  descript-audio-codec>=1.0.0
2
- gradio>=5.25.2
3
  huggingface-hub>=0.30.2
4
  numpy>=2.2.4
5
  pydantic>=2.11.3
6
  soundfile>=0.13.1
7
  torchaudio>=2.0.0
8
  torch>=2.0.0
9
- gradio-dialogue>=0.0.4
 
1
  descript-audio-codec>=1.0.0
 
2
  huggingface-hub>=0.30.2
3
  numpy>=2.2.4
4
  pydantic>=2.11.3
5
  soundfile>=0.13.1
6
  torchaudio>=2.0.0
7
  torch>=2.0.0
8
+ gradio-dialogue>=0.0.5