Spaces:
Running
Running
import gradio as gr | |
import httpx | |
from gradio_dialogue import Dialogue | |
emotions = [ | |
"(laughs)", | |
"(clears throat)", | |
"(sighs)", | |
"(gasps)", | |
"(coughs)", | |
"(singing)", | |
"(sings)", | |
"(mumbles)", | |
"(beep)", | |
"(groans)", | |
"(sniffs)", | |
"(claps)", | |
"(screams)", | |
"(inhales)", | |
"(exhales)", | |
"(applause)", | |
"(burps)", | |
"(humming)", | |
"(sneezes)", | |
"(chuckle)", | |
"(whistles)", | |
] | |
speakers = ["Speaker 1", "Speaker 2"] | |
client = httpx.AsyncClient(timeout=180) | |
async def query(dialogue: str, token: gr.OAuthToken | None): | |
if token is None: | |
raise gr.Error( | |
"No token provided. Use Sign in with Hugging Face to get a token." | |
) | |
API_URL = "https://router.huggingface.co/fal-ai/fal-ai/dia-tts" | |
headers = { | |
"Authorization": f"Bearer {token.token}", | |
} | |
response = await client.post(API_URL, headers=headers, json={"text": dialogue}) | |
url = response.json()["audio"]["url"] | |
print("URL: ", url) | |
return url | |
def formatter(speaker, text): | |
speaker = speaker.split(" ")[1] | |
return f"[S{speaker}] {text}" | |
with gr.Blocks() as demo: | |
with gr.Sidebar(): | |
login_button = gr.LoginButton() | |
gr.HTML( | |
""" | |
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'> | |
<img src="/gradio_api/file=dancing_huggy.gif" alt="Dancing Huggy" style="height: 100px; margin-right: 10px"> Dia Dialogue Generation Model | |
</h1> | |
<h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by <a href="https://huggingface.co./nari-labs/Dia-1.6B"> Nari Labs</a>. Powered by HF and <a href="https://fal.ai/">Fal AI</a> API.</h2> | |
<h3>Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .</h3> | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
dialogue = Dialogue( | |
speakers=speakers, emotions=emotions, formatter=formatter | |
) | |
with gr.Column(): | |
with gr.Row(): | |
audio = gr.Audio(label="Audio") | |
with gr.Row(): | |
gr.DeepLinkButton(value="Share Audio via Link") | |
with gr.Row(): | |
gr.Examples( | |
examples=[ | |
[ | |
[ | |
{ | |
"speaker": "Speaker 1", | |
"text": "Why did the chicken cross the road?", | |
}, | |
{"speaker": "Speaker 2", "text": "I don't know!"}, | |
{ | |
"speaker": "Speaker 1", | |
"text": "to get to the other side! (laughs)", | |
}, | |
] | |
], | |
[ | |
[ | |
{ | |
"speaker": "Speaker 1", | |
"text": "I am a little tired today (sighs).", | |
}, | |
{"speaker": "Speaker 2", "text": "Hang in there!"}, | |
] | |
], | |
], | |
inputs=[dialogue], | |
cache_examples=False, | |
) | |
dialogue.submit(query, [dialogue], audio) | |
demo.launch(ssr_mode=False, allowed_paths=["dancing_huggy.gif"]) | |