import streamlit as st import time import requests from streamlit.components.v1 import html import os from dotenv import load_dotenv # New imports for voice input import torchaudio import numpy as np import torch from io import BytesIO import hashlib from audio_recorder_streamlit import audio_recorder from transformers import pipeline ###################################### # Voice Input Helper Functions ###################################### @st.cache_resource def load_voice_model(): # Loading the Whisper model (which automatically detects both English and Urdu) return pipeline("automatic-speech-recognition", model="openai/whisper-base") def process_audio(audio_bytes): waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes)) if waveform.shape[0] > 1: # Convert stereo to mono waveform = torch.mean(waveform, dim=0, keepdim=True) if sample_rate != 16000: # Resample to 16kHz if needed resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) waveform = resampler(waveform) return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000} def get_voice_transcription(state_key): """Display audio recorder for a given key. If new audio is recorded, transcribe it and update the session state. """ if state_key not in st.session_state: st.session_state[state_key] = "" # Use a unique key for the recorder widget audio_bytes = audio_recorder(key=state_key + "_audio", pause_threshold=0.8, text="Speak to type", recording_color="#e8b62c", neutral_color="#6aa36f") if audio_bytes: current_hash = hashlib.md5(audio_bytes).hexdigest() last_hash_key = state_key + "_last_hash" if st.session_state.get(last_hash_key, "") != current_hash: st.session_state[last_hash_key] = current_hash try: audio_input = process_audio(audio_bytes) whisper = load_voice_model() transcribed_text = whisper(audio_input)["text"] st.info(f"đ Transcribed: {transcribed_text}") # Append (or set) new transcription st.session_state[state_key] += (" " + transcribed_text).strip() st.experimental_rerun() except Exception as e: st.error(f"Voice input error: {str(e)}") return st.session_state[state_key] ###################################### # Existing Game Helper Functions ###################################### @st.cache_resource def get_help_agent(): from transformers import pipeline # Using BlenderBot 400M Distill as the public conversational model (used elsewhere) return pipeline("conversational", model="facebook/blenderbot-400M-distill") def inject_custom_css(): st.markdown(""" """, unsafe_allow_html=True) def show_confetti(): html(""" """) def ask_llama(conversation_history, category, is_final_guess=False): api_url = "https://api.groq.com/openai/v1/chat/completions" headers = { "Authorization": "Bearer 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", "Content-Type": "application/json" } system_prompt = f"""You're playing 20 questions to guess a {category}. Follow these rules: 1. Ask strategic, non-repeating yes/no questions that narrow down possibilities 2. Consider all previous answers carefully before asking next question 3. If you're very confident (80%+ sure), respond with "Final Guess: [your guess]" 4. For places: ask about continent, climate, famous landmarks, country, city or population 5. For people: ask about fictional or real, profession, gender, alive/dead, nationality, or fame 6. For objects: ask about size, color, usage, material, or where it's found 7. Never repeat questions and always make progress toward guessing""" if is_final_guess: prompt = f"""Based on these answers about a {category}, provide ONLY your final guess with no extra text: {conversation_history}""" else: prompt = "Ask your next strategic yes/no question that will best narrow down the possibilities." messages = [ {"role": "system", "content": system_prompt}, *conversation_history, {"role": "user", "content": prompt} ] data = { "model": "llama-3.3-70b-versatile", "messages": messages, "temperature": 0.7 if is_final_guess else 0.8, "max_tokens": 100 } try: response = requests.post(api_url, headers=headers, json=data) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] except Exception as e: st.error(f"Error calling Llama API: {str(e)}") return "Could not generate question" MISTRAL_API_KEY = "wm5eLl09b9I9cOxR3E9n5rrRr1CRQQjn" def ask_help_agent(query): try: url = "https://api.mistral.ai/v1/chat/completions" headers = { "Authorization": f"Bearer {MISTRAL_API_KEY}", "Content-Type": "application/json" } system_message = "You are a friendly Chatbot." messages = [{"role": "system", "content": system_message}] if "help_conversation" in st.session_state: for msg in st.session_state.help_conversation: if msg.get("query"): messages.append({"role": "user", "content": msg["query"]}) if msg.get("response"): messages.append({"role": "assistant", "content": msg["response"]}) messages.append({"role": "user", "content": query}) payload = { "model": "mistral-tiny", "messages": messages, "temperature": 0.7, "top_p": 0.95 } response = requests.post(url, headers=headers, json=payload) if response.status_code == 200: result = response.json() return result["choices"][0]["message"]["content"] else: return f"API Error {response.status_code}: {response.text}" except Exception as e: return f"Error in help agent: {str(e)}" def show_techniques_modal(): # Use Streamlit's expander with markdown with st.expander("âšī¸ Project Techniques & Limitations", expanded=True): st.markdown(""" **AI Models Used:** 1. Groq Llama 3.3-70B - For generating strategic questions and final guesses 2. Mistral Tiny - Powers the help chat assistant 3. OpenAI Whisper - Converts speech to text in real-time 4. Hard Prompt Tuning - Carefully engineered prompts to optimize model performance **Known Limitations:** 1. Voice input may take 5-10 seconds to process sentences, which is fine. 2. Single words (like "yes", "object") may take 10-20 minutes, which is irritating. 3. Language Support - While Whisper understands and writes Urdu, but the game only supports English responses """) if st.button("Close", key="modal_close_btn"): pass # The expander will automatically close ###################################### # Main Game Logic with Voice Integration ###################################### def main(): inject_custom_css() st.markdown('
Think of something and I'll try to guess it in 20 questions or less!
Choose from these categories:
Celebrity, fictional character, historical figure
City, country, landmark, geographical location
Everyday item, tool, vehicle
{current_question}
Is it {st.session_state.final_guess}?
Guessed in {len(st.session_state.questions)} questions
", unsafe_allow_html=True) if st.button("Play Again", key="play_again"): st.session_state.clear() st.experimental_rerun() if __name__ == "__main__": main()