import time import os import numpy as np import gradio as gr import random from fastrtc import AdditionalOutputs, WebRTC, ReplyOnPause, get_cloudflare_turn_credentials_async, get_cloudflare_turn_credentials HF_TOKEN = os.getenv("HF_TOKEN") async def get_credentials(): return await get_cloudflare_turn_credentials_async(hf_token=HF_TOKEN) possible_responses = [ "hello", "hi", "how's it going?", "what's up?", "how's your day?", "how are you?", "what's your name?", "where are you from?", "what do you do?", "what's your favorite color?", "what's your favorite food?", "what's your favorite movie?", ] def transcribe(audio: tuple[int, np.ndarray]): time.sleep(1) transformers_convo = [ {"role": "assistant", "content": random.choice(possible_responses)} ] yield AdditionalOutputs(transformers_convo) with gr.Blocks() as demo: gr.HTML( """ TEST """ ) test_state = gr.State() with gr.Row(): with gr.Column(): audio = WebRTC( label="Stream", mode="send-receive", modality="audio", rtc_configuration=get_credentials, server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000), ) with gr.Column(): transcript = gr.Chatbot(label="transcript", type="messages") audio.stream( ReplyOnPause(transcribe), inputs=[audio], outputs=[audio], time_limit=180 ) def handle_additional_outputs(outputs): print(f"outputs: {outputs}") return outputs audio.on_additional_outputs( fn=handle_additional_outputs, outputs=[test_state], concurrency_limit=100, queue=True, #False, show_progress="hidden", ) gr.on(test_state.change, inputs=[test_state, transcript], outputs=[transcript]) def state_change(test_state, transcript): print(f"test_state: {test_state}") return test_state + transcript demo.launch(server_name="0.0.0.0", server_port=7860, share=True)