MyPod_10

Running

App Files Files Community

MyPod_10 / qa.py

siddhartharyaai

Update qa.py

38c419f verified 4 months ago

raw

history blame contribute delete

2.89 kB

	# qa.py

	import os
	import requests
	import json
	import tempfile
	import streamlit as st

	from utils import generate_audio_mp3 # Reuse your existing TTS function

	def transcribe_audio_deepgram(local_audio_path: str) -> str:
	"""
	Sends a local audio file to Deepgram for STT.
	Returns the transcript text if successful, or raises an error if failed.
	"""
	DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
	if not DEEPGRAM_API_KEY:
	raise ValueError("Deepgram API key not found in environment variables.")

	url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
	# For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
	headers = {
	"Authorization": f"Token {DEEPGRAM_API_KEY}",
	"Content-Type": "audio/wav"
	}

	with open(local_audio_path, "rb") as f:
	response = requests.post(url, headers=headers, data=f)
	response.raise_for_status()

	data = response.json()
	# Extract the transcript
	transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
	return transcript


	def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
	"""
	Minimal function that calls your LLM (Groq) to answer a follow-up question.
	Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
	"""
	system_prompt = f"""
	You are John, the guest speaker. The user is asking a follow-up question.
	Conversation so far:
	{conversation_so_far}

	New user question:
	{user_question}

	Please respond in JSON with keys "speaker" and "text", e.g.:
	{{ "speaker": "John", "text": "Sure, here's my answer..." }}
	"""

	from utils import call_groq_api_for_qa

	raw_json_response = call_groq_api_for_qa(system_prompt)
	# Expect a JSON string: {"speaker": "John", "text": "some short answer"}
	response_dict = json.loads(raw_json_response)
	return response_dict


	def handle_qa_exchange(user_question: str) -> (bytes, str):
	"""
	1) Read conversation_so_far from session_state
	2) Call the LLM for a short follow-up answer
	3) Generate TTS audio
	4) Return (audio_bytes, answer_text)
	"""
	conversation_so_far = st.session_state.get("conversation_history", "")

	# Ask the LLM
	response_dict = call_llm_for_qa(conversation_so_far, user_question)
	answer_text = response_dict.get("text", "")
	speaker = response_dict.get("speaker", "John")

	# Update conversation
	new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
	st.session_state["conversation_history"] = new_history

	if not answer_text.strip():
	return (None, "")

	# TTS
	audio_file_path = generate_audio_mp3(answer_text, "John") # always John
	with open(audio_file_path, "rb") as f:
	audio_bytes = f.read()

	return (audio_bytes, answer_text)