File size: 2,893 Bytes
70347ba
 
 
71678c7
70347ba
37a3329
70347ba
 
38c419f
b0e78f5
71678c7
 
38c419f
 
71678c7
 
 
 
 
 
38c419f
71678c7
 
 
 
38c419f
71678c7
 
 
38c419f
71678c7
38c419f
71678c7
 
b0e78f5
38c419f
 
b0e78f5
38c419f
 
70347ba
 
37a3329
70347ba
 
 
 
 
 
 
 
 
 
38c419f
 
70347ba
38c419f
70347ba
38c419f
 
 
 
 
 
 
 
 
 
 
 
 
 
70347ba
 
 
38c419f
 
 
 
70347ba
 
 
38c419f
 
70347ba
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# qa.py

import os
import requests
import json
import tempfile
import streamlit as st

from utils import generate_audio_mp3  # Reuse your existing TTS function

def transcribe_audio_deepgram(local_audio_path: str) -> str:
    """
    Sends a local audio file to Deepgram for STT.
    Returns the transcript text if successful, or raises an error if failed.
    """
    DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
    if not DEEPGRAM_API_KEY:
        raise ValueError("Deepgram API key not found in environment variables.")

    url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
    # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
    headers = {
        "Authorization": f"Token {DEEPGRAM_API_KEY}",
        "Content-Type": "audio/wav"
    }

    with open(local_audio_path, "rb") as f:
        response = requests.post(url, headers=headers, data=f)
    response.raise_for_status()

    data = response.json()
    # Extract the transcript
    transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
    return transcript


def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
    """
    Minimal function that calls your LLM (Groq) to answer a follow-up question.
    Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
    """
    system_prompt = f"""
    You are John, the guest speaker. The user is asking a follow-up question.
    Conversation so far:
    {conversation_so_far}

    New user question:
    {user_question}

    Please respond in JSON with keys "speaker" and "text", e.g.:
    {{ "speaker": "John", "text": "Sure, here's my answer..." }}
    """

    from utils import call_groq_api_for_qa

    raw_json_response = call_groq_api_for_qa(system_prompt)
    # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
    response_dict = json.loads(raw_json_response)
    return response_dict


def handle_qa_exchange(user_question: str) -> (bytes, str):
    """
    1) Read conversation_so_far from session_state
    2) Call the LLM for a short follow-up answer
    3) Generate TTS audio
    4) Return (audio_bytes, answer_text)
    """
    conversation_so_far = st.session_state.get("conversation_history", "")

    # Ask the LLM
    response_dict = call_llm_for_qa(conversation_so_far, user_question)
    answer_text = response_dict.get("text", "")
    speaker = response_dict.get("speaker", "John")

    # Update conversation
    new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
    st.session_state["conversation_history"] = new_history

    if not answer_text.strip():
        return (None, "")

    # TTS
    audio_file_path = generate_audio_mp3(answer_text, "John")  # always John
    with open(audio_file_path, "rb") as f:
        audio_bytes = f.read()

    return (audio_bytes, answer_text)