from fastrtc import Stream, ReplyOnPause import numpy as np from pydub import AudioSegment from pydub.utils import which import gradio as gr AudioSegment.ffmpeg = which("ffmpeg") def echo(audio: tuple[int, np.ndarray]) -> tuple[int, np.ndarray]: yield audio stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive") def process_audio(audio_input): # audio_input is received as a Gradio Audio object, containing a tuple of (sample_rate, numpy array) sample_rate, audio_data = audio_input # Process audio through the stream processed_sample_rate, processed_audio = echo((sample_rate, audio_data)) # Return processed audio to Gradio for output return processed_sample_rate, processed_audio interface = gr.Interface( fn=process_audio, # The function to process audio inputs=gr.Audio(type="numpy"), # Microphone input (audio) outputs=gr.Audio(type="numpy"), # Audio output (processed) live=True # Make the processing live (if needed) ) interface.launch()