Spaces:
Running
Running
# qa.py | |
import os | |
import requests | |
import json | |
import tempfile | |
import streamlit as st | |
from utils import generate_audio_mp3 # Reuse your existing TTS function | |
def transcribe_audio_deepgram(local_audio_path: str) -> str: | |
""" | |
Sends a local audio file to Deepgram for STT. | |
Returns the transcript text if successful, or raises an error if failed. | |
""" | |
DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY") | |
if not DEEPGRAM_API_KEY: | |
raise ValueError("Deepgram API key not found in environment variables.") | |
url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true" | |
# For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg". | |
headers = { | |
"Authorization": f"Token {DEEPGRAM_API_KEY}", | |
"Content-Type": "audio/wav" | |
} | |
with open(local_audio_path, "rb") as f: | |
response = requests.post(url, headers=headers, data=f) | |
response.raise_for_status() | |
data = response.json() | |
# Extract the transcript | |
transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "") | |
return transcript | |
def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict: | |
""" | |
Minimal function that calls your LLM (Groq) to answer a follow-up question. | |
Returns a Python dict, e.g.: {"speaker": "John", "text": "..."} | |
""" | |
system_prompt = f""" | |
You are John, the guest speaker. The user is asking a follow-up question. | |
Conversation so far: | |
{conversation_so_far} | |
New user question: | |
{user_question} | |
Please respond in JSON with keys "speaker" and "text", e.g.: | |
{{ "speaker": "John", "text": "Sure, here's my answer..." }} | |
""" | |
from utils import call_groq_api_for_qa | |
raw_json_response = call_groq_api_for_qa(system_prompt) | |
# Expect a JSON string: {"speaker": "John", "text": "some short answer"} | |
response_dict = json.loads(raw_json_response) | |
return response_dict | |
def handle_qa_exchange(user_question: str) -> (bytes, str): | |
""" | |
1) Read conversation_so_far from session_state | |
2) Call the LLM for a short follow-up answer | |
3) Generate TTS audio | |
4) Return (audio_bytes, answer_text) | |
""" | |
conversation_so_far = st.session_state.get("conversation_history", "") | |
# Ask the LLM | |
response_dict = call_llm_for_qa(conversation_so_far, user_question) | |
answer_text = response_dict.get("text", "") | |
speaker = response_dict.get("speaker", "John") | |
# Update conversation | |
new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n" | |
st.session_state["conversation_history"] = new_history | |
if not answer_text.strip(): | |
return (None, "") | |
# TTS | |
audio_file_path = generate_audio_mp3(answer_text, "John") # always John | |
with open(audio_file_path, "rb") as f: | |
audio_bytes = f.read() | |
return (audio_bytes, answer_text) | |