Spaces:
Running
Running
File size: 2,893 Bytes
70347ba 71678c7 70347ba 37a3329 70347ba 38c419f b0e78f5 71678c7 38c419f 71678c7 38c419f 71678c7 38c419f 71678c7 38c419f 71678c7 38c419f 71678c7 b0e78f5 38c419f b0e78f5 38c419f 70347ba 37a3329 70347ba 38c419f 70347ba 38c419f 70347ba 38c419f 70347ba 38c419f 70347ba 38c419f 70347ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# qa.py
import os
import requests
import json
import tempfile
import streamlit as st
from utils import generate_audio_mp3 # Reuse your existing TTS function
def transcribe_audio_deepgram(local_audio_path: str) -> str:
"""
Sends a local audio file to Deepgram for STT.
Returns the transcript text if successful, or raises an error if failed.
"""
DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
if not DEEPGRAM_API_KEY:
raise ValueError("Deepgram API key not found in environment variables.")
url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
# For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
headers = {
"Authorization": f"Token {DEEPGRAM_API_KEY}",
"Content-Type": "audio/wav"
}
with open(local_audio_path, "rb") as f:
response = requests.post(url, headers=headers, data=f)
response.raise_for_status()
data = response.json()
# Extract the transcript
transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
return transcript
def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
"""
Minimal function that calls your LLM (Groq) to answer a follow-up question.
Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
"""
system_prompt = f"""
You are John, the guest speaker. The user is asking a follow-up question.
Conversation so far:
{conversation_so_far}
New user question:
{user_question}
Please respond in JSON with keys "speaker" and "text", e.g.:
{{ "speaker": "John", "text": "Sure, here's my answer..." }}
"""
from utils import call_groq_api_for_qa
raw_json_response = call_groq_api_for_qa(system_prompt)
# Expect a JSON string: {"speaker": "John", "text": "some short answer"}
response_dict = json.loads(raw_json_response)
return response_dict
def handle_qa_exchange(user_question: str) -> (bytes, str):
"""
1) Read conversation_so_far from session_state
2) Call the LLM for a short follow-up answer
3) Generate TTS audio
4) Return (audio_bytes, answer_text)
"""
conversation_so_far = st.session_state.get("conversation_history", "")
# Ask the LLM
response_dict = call_llm_for_qa(conversation_so_far, user_question)
answer_text = response_dict.get("text", "")
speaker = response_dict.get("speaker", "John")
# Update conversation
new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
st.session_state["conversation_history"] = new_history
if not answer_text.strip():
return (None, "")
# TTS
audio_file_path = generate_audio_mp3(answer_text, "John") # always John
with open(audio_file_path, "rb") as f:
audio_bytes = f.read()
return (audio_bytes, answer_text)
|