import streamlit as st import time import requests from streamlit.components.v1 import html import os from dotenv import load_dotenv # New imports for voice input import torchaudio import numpy as np import torch from io import BytesIO import hashlib from audio_recorder_streamlit import audio_recorder from transformers import pipeline ###################################### # Voice Input Helper Functions ###################################### @st.cache_resource def load_voice_model(): # Loading the Whisper model (which automatically detects both English and Urdu) return pipeline("automatic-speech-recognition", model="openai/whisper-base") def process_audio(audio_bytes): waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes)) if waveform.shape[0] > 1: # Convert stereo to mono waveform = torch.mean(waveform, dim=0, keepdim=True) if sample_rate != 16000: # Resample to 16kHz if needed resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) waveform = resampler(waveform) return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000} def get_voice_transcription(state_key): """Display audio recorder for a given key. If new audio is recorded, transcribe it and update the session state. """ if state_key not in st.session_state: st.session_state[state_key] = "" # Use a unique key for the recorder widget audio_bytes = audio_recorder(key=state_key + "_audio", pause_threshold=0.8, text="Speak to type", recording_color="#e8b62c", neutral_color="#6aa36f") if audio_bytes: current_hash = hashlib.md5(audio_bytes).hexdigest() last_hash_key = state_key + "_last_hash" if st.session_state.get(last_hash_key, "") != current_hash: st.session_state[last_hash_key] = current_hash try: audio_input = process_audio(audio_bytes) whisper = load_voice_model() transcribed_text = whisper(audio_input)["text"] st.info(f"📝 Transcribed: {transcribed_text}") # Append (or set) new transcription st.session_state[state_key] += (" " + transcribed_text).strip() st.experimental_rerun() except Exception as e: st.error(f"Voice input error: {str(e)}") return st.session_state[state_key] ###################################### # Existing Game Helper Functions ###################################### @st.cache_resource def get_help_agent(): from transformers import pipeline # Using BlenderBot 400M Distill as the public conversational model (used elsewhere) return pipeline("conversational", model="facebook/blenderbot-400M-distill") def inject_custom_css(): st.markdown(""" """, unsafe_allow_html=True) def show_confetti(): html(""" """) def ask_llama(conversation_history, category, is_final_guess=False): api_url = "https://api.groq.com/openai/v1/chat/completions" headers = { "Authorization": "Bearer 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", "Content-Type": "application/json" } system_prompt = f"""You're playing 20 questions to guess a {category}. Follow these rules: 1. Ask strategic, non-repeating yes/no questions that narrow down possibilities 2. Consider all previous answers carefully before asking next question 3. If you're very confident (80%+ sure), respond with "Final Guess: [your guess]" 4. For places: ask about continent, climate, famous landmarks, country, city or population 5. For people: ask about fictional or real, profession, gender, alive/dead, nationality, or fame 6. For objects: ask about size, color, usage, material, or where it's found 7. Never repeat questions and always make progress toward guessing""" if is_final_guess: prompt = f"""Based on these answers about a {category}, provide ONLY your final guess with no extra text: {conversation_history}""" else: prompt = "Ask your next strategic yes/no question that will best narrow down the possibilities." messages = [ {"role": "system", "content": system_prompt}, *conversation_history, {"role": "user", "content": prompt} ] data = { "model": "llama-3.3-70b-versatile", "messages": messages, "temperature": 0.7 if is_final_guess else 0.8, "max_tokens": 100 } try: response = requests.post(api_url, headers=headers, json=data) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] except Exception as e: st.error(f"Error calling Llama API: {str(e)}") return "Could not generate question" MISTRAL_API_KEY = "wm5eLl09b9I9cOxR3E9n5rrRr1CRQQjn" def ask_help_agent(query): try: url = "https://api.mistral.ai/v1/chat/completions" headers = { "Authorization": f"Bearer {MISTRAL_API_KEY}", "Content-Type": "application/json" } system_message = "You are a friendly Chatbot." messages = [{"role": "system", "content": system_message}] if "help_conversation" in st.session_state: for msg in st.session_state.help_conversation: if msg.get("query"): messages.append({"role": "user", "content": msg["query"]}) if msg.get("response"): messages.append({"role": "assistant", "content": msg["response"]}) messages.append({"role": "user", "content": query}) payload = { "model": "mistral-tiny", "messages": messages, "temperature": 0.7, "top_p": 0.95 } response = requests.post(url, headers=headers, json=payload) if response.status_code == 200: result = response.json() return result["choices"][0]["message"]["content"] else: return f"API Error {response.status_code}: {response.text}" except Exception as e: return f"Error in help agent: {str(e)}" def show_techniques_modal(): # Use Streamlit's expander with markdown with st.expander("â„šī¸ Project Techniques & Limitations", expanded=True): st.markdown(""" **AI Models Used:** 1. Groq Llama 3.3-70B - For generating strategic questions and final guesses 2. Mistral Tiny - Powers the help chat assistant 3. OpenAI Whisper - Converts speech to text in real-time 4. Hard Prompt Tuning - Carefully engineered prompts to optimize model performance **Known Limitations:** 1. Voice input may take 5-10 seconds to process sentences, which is fine. 2. Single words (like "yes", "object") may take 10-20 minutes, which is irritating. 3. Language Support - While Whisper understands and writes Urdu, but the game only supports English responses """) if st.button("Close", key="modal_close_btn"): pass # The expander will automatically close ###################################### # Main Game Logic with Voice Integration ###################################### def main(): inject_custom_css() st.markdown('
KASOTI
', unsafe_allow_html=True) st.markdown('
AI-Powered Guessing Game Challenge
', unsafe_allow_html=True) if st.button("â„šī¸ Project Techniques & Limitations", key="info_btn"): show_techniques_modal() if 'game_state' not in st.session_state: st.session_state.game_state = "start" st.session_state.questions = [] st.session_state.current_q = 0 st.session_state.answers = [] st.session_state.conversation_history = [] st.session_state.category = None st.session_state.final_guess = None st.session_state.help_conversation = [] # separate history for help agent # Start screen with enhanced layout if st.session_state.game_state == "start": with st.container(): st.markdown("""

🎮 Welcome to KASOTI

Think of something and I'll try to guess it in 20 questions or less!
Choose from these categories:

🧑 Person

Celebrity, fictional character, historical figure

🌍 Place

City, country, landmark, geographical location

đŸŽ¯ Object

Everyday item, tool, vehicle

""", unsafe_allow_html=True) with st.form("start_form"): # --- Voice Input for Category --- st.markdown("#### Use Voice (English/Urdu) for Category Input") voice_category = get_voice_transcription("voice_category") # The text input now defaults to any spoken words category_input = st.text_input("Enter category (person/place/object):", value=voice_category.strip(), key="category_input").strip().lower() if st.form_submit_button("Start Game"): if not category_input: st.error("Please enter a category!") elif category_input not in ["person", "place", "object"]: st.error("Please enter either 'person', 'place', or 'object'!") else: st.session_state.category = category_input first_question = ask_llama([ {"role": "user", "content": "Ask your first strategic yes/no question."} ], category_input) st.session_state.questions = [first_question] st.session_state.conversation_history = [ {"role": "assistant", "content": first_question} ] st.session_state.game_state = "gameplay" st.experimental_rerun() # Gameplay screen with progress bar elif st.session_state.game_state == "gameplay": with st.container(): progress = (st.session_state.current_q + 1) / 20 st.markdown(f"""
QUESTION {st.session_state.current_q + 1} OF 20
""", unsafe_allow_html=True) current_question = st.session_state.questions[st.session_state.current_q] st.markdown(f'''

AI Question

{current_question}

''', unsafe_allow_html=True) if "Final Guess:" in current_question: st.session_state.final_guess = current_question.split("Final Guess:")[1].strip() st.session_state.game_state = "confirm_guess" st.experimental_rerun() with st.form("answer_form"): # --- Voice Input for Answer --- st.markdown("#### Use Voice (English/Urdu) for Your Answer") voice_answer = get_voice_transcription("voice_answer") answer_input = st.text_input("Your answer (yes/no/both):", value=voice_answer.strip(), key=f"answer_{st.session_state.current_q}").strip().lower() if st.form_submit_button("Submit"): if answer_input not in ["yes", "no", "both"]: st.error("Please answer with 'yes', 'no', or 'both'!") else: st.session_state.answers.append(answer_input) st.session_state.conversation_history.append( {"role": "user", "content": answer_input} ) next_response = ask_llama( st.session_state.conversation_history, st.session_state.category ) if "Final Guess:" in next_response: st.session_state.final_guess = next_response.split("Final Guess:")[1].strip() st.session_state.game_state = "confirm_guess" else: st.session_state.questions.append(next_response) st.session_state.conversation_history.append( {"role": "assistant", "content": next_response} ) st.session_state.current_q if st.session_state.current_q >= 20: st.session_state.game_state = "result" st.experimental_rerun() with st.expander("Need Help? Chat with AI Assistant"): # --- Voice Input for Help Query --- st.markdown("#### Use Voice (English/Urdu) for Help Query") voice_help = get_voice_transcription("voice_help") help_query = st.text_input("Enter your help query:", value=voice_help.strip(), key="help_query") if st.button("Send", key="send_help"): if help_query: help_response = ask_help_agent(help_query) st.session_state.help_conversation.append({"query": help_query, "response": help_response}) else: st.error("Please enter a query!") if st.session_state.help_conversation: for msg in st.session_state.help_conversation: st.markdown(f"**You:** {msg['query']}") st.markdown(f"**Help Assistant:** {msg['response']}") elif st.session_state.game_state == "confirm_guess": st.markdown(f'''

AI's Final Guess

Is it {st.session_state.final_guess}?

''', unsafe_allow_html=True) with st.form("confirm_form"): confirm_input = st.text_input("Type your answer (yes/no/both):", key="confirm_input").strip().lower() if st.form_submit_button("Submit"): if confirm_input not in ["yes", "no", "both"]: st.error("Please answer with 'yes', 'no', or 'both'!") else: if confirm_input == "yes": st.session_state.game_state = "result" st.experimental_rerun() st.stop() else: st.session_state.conversation_history.append( {"role": "user", "content": "no"} ) st.session_state.game_state = "gameplay" next_response = ask_llama( st.session_state.conversation_history, st.session_state.category ) st.session_state.questions.append(next_response) st.session_state.conversation_history.append( {"role": "assistant", "content": next_response} ) st.session_state.current_q st.experimental_rerun() elif st.session_state.game_state == "result": if not st.session_state.final_guess: qa_history = "\n".join( [f"Q{i+1}: {q}\nA: {a}" for i, (q, a) in enumerate(zip(st.session_state.questions, st.session_state.answers))] ) final_guess = ask_llama( [{"role": "user", "content": qa_history}], st.session_state.category, is_final_guess=True ) st.session_state.final_guess = final_guess.split("Final Guess:")[-1].strip() show_confetti() st.markdown(f'
🎉 It\'s...
', unsafe_allow_html=True) time.sleep(1) st.markdown(f'
{st.session_state.final_guess}
', unsafe_allow_html=True) st.markdown(f"

Guessed in {len(st.session_state.questions)} questions

", unsafe_allow_html=True) if st.button("Play Again", key="play_again"): st.session_state.clear() st.experimental_rerun() if __name__ == "__main__": main()