MyPod_10

Running

File size: 2,893 Bytes

# qa.py

import os
import requests
import json
import tempfile
import streamlit as st

from utils import generate_audio_mp3  # Reuse your existing TTS function

def transcribe_audio_deepgram(local_audio_path: str) -> str:
    """
    Sends a local audio file to Deepgram for STT.
    Returns the transcript text if successful, or raises an error if failed.
    """
    DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
    if not DEEPGRAM_API_KEY:
        raise ValueError("Deepgram API key not found in environment variables.")

    url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
    # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
    headers = {
        "Authorization": f"Token {DEEPGRAM_API_KEY}",
        "Content-Type": "audio/wav"
    }

    with open(local_audio_path, "rb") as f:
        response = requests.post(url, headers=headers, data=f)
    response.raise_for_status()

    data = response.json()
    # Extract the transcript
    transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
    return transcript


def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
    """
    Minimal function that calls your LLM (Groq) to answer a follow-up question.
    Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
    """
    system_prompt = f"""
    You are John, the guest speaker. The user is asking a follow-up question.
    Conversation so far:
    {conversation_so_far}

    New user question:
    {user_question}

    Please respond in JSON with keys "speaker" and "text", e.g.:
    {{ "speaker": "John", "text": "Sure, here's my answer..." }}
    """

    from utils import call_groq_api_for_qa

    raw_json_response = call_groq_api_for_qa(system_prompt)
    # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
    response_dict = json.loads(raw_json_response)
    return response_dict


def handle_qa_exchange(user_question: str) -> (bytes, str):
    """
    1) Read conversation_so_far from session_state
    2) Call the LLM for a short follow-up answer
    3) Generate TTS audio
    4) Return (audio_bytes, answer_text)
    """
    conversation_so_far = st.session_state.get("conversation_history", "")

    # Ask the LLM
    response_dict = call_llm_for_qa(conversation_so_far, user_question)
    answer_text = response_dict.get("text", "")
    speaker = response_dict.get("speaker", "John")

    # Update conversation
    new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
    st.session_state["conversation_history"] = new_history

    if not answer_text.strip():
        return (None, "")

    # TTS
    audio_file_path = generate_audio_mp3(answer_text, "John")  # always John
    with open(audio_file_path, "rb") as f:
        audio_bytes = f.read()

    return (audio_bytes, answer_text)