Spaces:

freddyaboulton
/

dia-demo

Running

dia-demo / app.py

Freddy Boulton

Fix

077bff1 4 days ago

3.48 kB

	import gradio as gr
	import httpx
	from gradio_dialogue import Dialogue

	emotions = [
	"(laughs)",
	"(clears throat)",
	"(sighs)",
	"(gasps)",
	"(coughs)",
	"(singing)",
	"(sings)",
	"(mumbles)",
	"(beep)",
	"(groans)",
	"(sniffs)",
	"(claps)",
	"(screams)",
	"(inhales)",
	"(exhales)",
	"(applause)",
	"(burps)",
	"(humming)",
	"(sneezes)",
	"(chuckle)",
	"(whistles)",
	]
	speakers = ["Speaker 1", "Speaker 2"]

	client = httpx.AsyncClient(timeout=180)


	async def query(dialogue: str, token: gr.OAuthToken \| None):
	if token is None:
	raise gr.Error(
	"No token provided. Use Sign in with Hugging Face to get a token."
	)
	API_URL = "https://router.huggingface.co/fal-ai/fal-ai/dia-tts"
	headers = {
	"Authorization": f"Bearer {token.token}",
	}
	response = await client.post(API_URL, headers=headers, json={"text": dialogue})
	url = response.json()["audio"]["url"]
	print("URL: ", url)
	return url


	def formatter(speaker, text):
	speaker = speaker.split(" ")[1]
	return f"[S{speaker}] {text}"


	with gr.Blocks() as demo:
	with gr.Sidebar():
	login_button = gr.LoginButton()
	gr.HTML(
	"""
	<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
	<img src="/gradio_api/file=dancing_huggy.gif" alt="Dancing Huggy" style="height: 100px; margin-right: 10px"> Dia Dialogue Generation Model
	</h1>
	<h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by <a href="https://huggingface.co./nari-labs/Dia-1.6B"> Nari Labs</a>. Powered by HF and <a href="https://fal.ai/">Fal AI</a> API.</h2>
	<h3>Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .</h3>
	"""
	)
	with gr.Row():
	with gr.Column():
	dialogue = Dialogue(
	speakers=speakers, emotions=emotions, formatter=formatter
	)
	with gr.Column():
	with gr.Row():
	audio = gr.Audio(label="Audio")
	with gr.Row():
	gr.DeepLinkButton(value="Share Audio via Link")
	with gr.Row():
	gr.Examples(
	examples=[
	[
	[
	{
	"speaker": "Speaker 1",
	"text": "Why did the chicken cross the road?",
	},
	{"speaker": "Speaker 2", "text": "I don't know!"},
	{
	"speaker": "Speaker 1",
	"text": "to get to the other side! (laughs)",
	},
	]
	],
	[
	[
	{
	"speaker": "Speaker 1",
	"text": "I am a little tired today (sighs).",
	},
	{"speaker": "Speaker 2", "text": "Hang in there!"},
	]
	],
	],
	inputs=[dialogue],
	cache_examples=False,
	)

	dialogue.submit(query, [dialogue], audio)

	demo.launch(ssr_mode=False, allowed_paths=["dancing_huggy.gif"])