MyPod_10 / qa.py
siddhartharyaai's picture
Update qa.py
38c419f verified
# qa.py
import os
import requests
import json
import tempfile
import streamlit as st
from utils import generate_audio_mp3 # Reuse your existing TTS function
def transcribe_audio_deepgram(local_audio_path: str) -> str:
"""
Sends a local audio file to Deepgram for STT.
Returns the transcript text if successful, or raises an error if failed.
"""
DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
if not DEEPGRAM_API_KEY:
raise ValueError("Deepgram API key not found in environment variables.")
url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
# For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
headers = {
"Authorization": f"Token {DEEPGRAM_API_KEY}",
"Content-Type": "audio/wav"
}
with open(local_audio_path, "rb") as f:
response = requests.post(url, headers=headers, data=f)
response.raise_for_status()
data = response.json()
# Extract the transcript
transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
return transcript
def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
"""
Minimal function that calls your LLM (Groq) to answer a follow-up question.
Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
"""
system_prompt = f"""
You are John, the guest speaker. The user is asking a follow-up question.
Conversation so far:
{conversation_so_far}
New user question:
{user_question}
Please respond in JSON with keys "speaker" and "text", e.g.:
{{ "speaker": "John", "text": "Sure, here's my answer..." }}
"""
from utils import call_groq_api_for_qa
raw_json_response = call_groq_api_for_qa(system_prompt)
# Expect a JSON string: {"speaker": "John", "text": "some short answer"}
response_dict = json.loads(raw_json_response)
return response_dict
def handle_qa_exchange(user_question: str) -> (bytes, str):
"""
1) Read conversation_so_far from session_state
2) Call the LLM for a short follow-up answer
3) Generate TTS audio
4) Return (audio_bytes, answer_text)
"""
conversation_so_far = st.session_state.get("conversation_history", "")
# Ask the LLM
response_dict = call_llm_for_qa(conversation_so_far, user_question)
answer_text = response_dict.get("text", "")
speaker = response_dict.get("speaker", "John")
# Update conversation
new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
st.session_state["conversation_history"] = new_history
if not answer_text.strip():
return (None, "")
# TTS
audio_file_path = generate_audio_mp3(answer_text, "John") # always John
with open(audio_file_path, "rb") as f:
audio_bytes = f.read()
return (audio_bytes, answer_text)