# qa.py import os import requests import json import tempfile import streamlit as st from utils import generate_audio_mp3 # Reuse your existing TTS function def transcribe_audio_deepgram(local_audio_path: str) -> str: """ Sends a local audio file to Deepgram for STT. Returns the transcript text if successful, or raises an error if failed. """ DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY") if not DEEPGRAM_API_KEY: raise ValueError("Deepgram API key not found in environment variables.") url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true" # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg". headers = { "Authorization": f"Token {DEEPGRAM_API_KEY}", "Content-Type": "audio/wav" } with open(local_audio_path, "rb") as f: response = requests.post(url, headers=headers, data=f) response.raise_for_status() data = response.json() # Extract the transcript transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "") return transcript def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict: """ Minimal function that calls your LLM (Groq) to answer a follow-up question. Returns a Python dict, e.g.: {"speaker": "John", "text": "..."} """ system_prompt = f""" You are John, the guest speaker. The user is asking a follow-up question. Conversation so far: {conversation_so_far} New user question: {user_question} Please respond in JSON with keys "speaker" and "text", e.g.: {{ "speaker": "John", "text": "Sure, here's my answer..." }} """ from utils import call_groq_api_for_qa raw_json_response = call_groq_api_for_qa(system_prompt) # Expect a JSON string: {"speaker": "John", "text": "some short answer"} response_dict = json.loads(raw_json_response) return response_dict def handle_qa_exchange(user_question: str) -> (bytes, str): """ 1) Read conversation_so_far from session_state 2) Call the LLM for a short follow-up answer 3) Generate TTS audio 4) Return (audio_bytes, answer_text) """ conversation_so_far = st.session_state.get("conversation_history", "") # Ask the LLM response_dict = call_llm_for_qa(conversation_so_far, user_question) answer_text = response_dict.get("text", "") speaker = response_dict.get("speaker", "John") # Update conversation new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n" st.session_state["conversation_history"] = new_history if not answer_text.strip(): return (None, "") # TTS audio_file_path = generate_audio_mp3(answer_text, "John") # always John with open(audio_file_path, "rb") as f: audio_bytes = f.read() return (audio_bytes, answer_text)