# import os # from fastrtc import (ReplyOnPause, Stream, get_stt_model, get_tts_model) # from openai import OpenAI # OpenAI.api_key = '5dd0312c-a759-42e8-a21d-6bbe275e7aa2' # sambanova_client = OpenAI( # api_key="5dd0312c-a759-42e8-a21d-6bbe275e7aa2", base_url="https://api.sambanova.ai/v1" # ) # stt_model = get_stt_model() # tts_model = get_tts_model() # def echo(audio): # prompt = stt_model.stt(audio) # response = sambanova_client.chat.completions.create( # model="Meta-Llama-3.2-3B-Instruct", # messages=[{"role": "user", "content": prompt}], # max_tokens=200, # ) # prompt = response.choices[0].message.content # for audio_chunk in tts_model.stream_tts_sync(prompt): # yield audio_chunk # stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive") # stream.ui.launch() from fastrtc import Stream, ReplyOnPause import numpy as np from pydub import AudioSegment from pydub.utils import which import socket AudioSegment.ffmpeg = which("ffmpeg") def find_free_port(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('', 0)) # Bind to an available port (0 means any available port) port = s.getsockname()[1] # Get the port number s.close() # Close the socket return port available_port = find_free_port() def echo(audio: tuple[int, np.ndarray]) -> tuple[int, np.ndarray]: yield audio stream = Stream(ReplyOnPause(echo), modality="audio", mode="send-receive") stream.ui.launch(share=True,server_port=available_port)