Spaces:

freddyaboulton
/

dia-demo

Running

File size: 3,480 Bytes

import gradio as gr
import httpx
from gradio_dialogue import Dialogue

emotions = [
    "(laughs)",
    "(clears throat)",
    "(sighs)",
    "(gasps)",
    "(coughs)",
    "(singing)",
    "(sings)",
    "(mumbles)",
    "(beep)",
    "(groans)",
    "(sniffs)",
    "(claps)",
    "(screams)",
    "(inhales)",
    "(exhales)",
    "(applause)",
    "(burps)",
    "(humming)",
    "(sneezes)",
    "(chuckle)",
    "(whistles)",
]
speakers = ["Speaker 1", "Speaker 2"]

client = httpx.AsyncClient(timeout=180)


async def query(dialogue: str, token: gr.OAuthToken | None):
    if token is None:
        raise gr.Error(
            "No token provided. Use Sign in with Hugging Face to get a token."
        )
    API_URL = "https://router.huggingface.co/fal-ai/fal-ai/dia-tts"
    headers = {
        "Authorization": f"Bearer {token.token}",
    }
    response = await client.post(API_URL, headers=headers, json={"text": dialogue})
    url = response.json()["audio"]["url"]
    print("URL: ", url)
    return url


def formatter(speaker, text):
    speaker = speaker.split(" ")[1]
    return f"[S{speaker}] {text}"


with gr.Blocks() as demo:
    with gr.Sidebar():
        login_button = gr.LoginButton()
    gr.HTML(
        """
        <h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
        <img src="/gradio_api/file=dancing_huggy.gif" alt="Dancing Huggy" style="height: 100px; margin-right: 10px"> Dia Dialogue Generation Model
        </h1>
        <h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by <a href="https://huggingface.co./nari-labs/Dia-1.6B"> Nari Labs</a>. Powered by HF and <a href="https://fal.ai/">Fal AI</a>  API.</h2>
        <h3>Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .</h3>
        """
    )
    with gr.Row():
        with gr.Column():
            dialogue = Dialogue(
                speakers=speakers, emotions=emotions, formatter=formatter
            )
        with gr.Column():
            with gr.Row():
                audio = gr.Audio(label="Audio")
            with gr.Row():
                gr.DeepLinkButton(value="Share Audio via Link")
    with gr.Row():
        gr.Examples(
            examples=[
                [
                    [
                        {
                            "speaker": "Speaker 1",
                            "text": "Why did the chicken cross the road?",
                        },
                        {"speaker": "Speaker 2", "text": "I don't know!"},
                        {
                            "speaker": "Speaker 1",
                            "text": "to get to the other side! (laughs)",
                        },
                    ]
                ],
                [
                    [
                        {
                            "speaker": "Speaker 1",
                            "text": "I am a little tired today (sighs).",
                        },
                        {"speaker": "Speaker 2", "text": "Hang in there!"},
                    ]
                ],
            ],
            inputs=[dialogue],
            cache_examples=False,
        )

    dialogue.submit(query, [dialogue], audio)

demo.launch(ssr_mode=False, allowed_paths=["dancing_huggy.gif"])