iisadia commited on
Commit
5e13a93
ยท
verified ยท
1 Parent(s): 98d16e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -132
app.py CHANGED
@@ -1,139 +1,195 @@
1
- import gradio as gr
2
- import speech_recognition as sr
3
- from time import time
4
- import threading
5
- from pydub import AudioSegment
6
- from pydub.playback import play
7
- import io
8
-
9
- # Global variables
10
- is_recording = False
11
- start_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b''), crossfade=100)
12
- end_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b'')), crossfade=100)
13
-
14
- def play_start_sound():
15
- try:
16
- play(start_beep)
17
- except:
18
- pass
19
 
20
- def play_end_sound():
21
- try:
22
- play(end_beep)
23
- except:
24
- pass
25
-
26
- def start_recording(audio_time_limit):
27
- global is_recording
28
- is_recording = True
29
- recognizer = sr.Recognizer()
30
- microphone = sr.Microphone()
31
-
32
- play_start_sound()
33
-
34
- with microphone as source:
35
- recognizer.adjust_for_ambient_noise(source, duration=0.5)
36
- try:
37
- audio = recognizer.listen(source, timeout=3, phrase_time_limit=audio_time_limit)
38
- text = recognizer.recognize_google(audio)
39
- return text
40
- except sr.WaitTimeoutError:
41
- return ""
42
- except sr.UnknownValueError:
43
- return ""
44
- except Exception as e:
45
- print(f"Error: {str(e)}")
46
- return ""
47
- finally:
48
- play_end_sound()
49
- is_recording = False
50
-
51
- def transcribe_audio(audio_time_limit=10):
52
- def execute_recording():
53
- nonlocal result
54
- result = start_recording(audio_time_limit)
55
-
56
- result = ""
57
- recording_thread = threading.Thread(target=execute_recording)
58
- recording_thread.start()
59
-
60
- start_time = time()
61
- while is_recording and (time() - start_time) < audio_time_limit:
62
- time_elapsed = time() - start_time
63
- time_left = max(0, audio_time_limit - time_elapsed)
64
- progress = 1 - (time_left / audio_time_limit)
65
- yield {"__type__": "update", "value": f"๐ŸŽค Recording... {time_left:.1f}s left", "visible": True}, {"__type__": "update", "value": "", "visible": True}
66
- gr.sleep(0.1)
67
-
68
- recording_thread.join()
69
- yield {"__type__": "update", "value": "โœ… Done!", "visible": True}, {"__type__": "update", "value": result, "visible": True}
70
-
71
- def create_ui():
72
- css = """
73
- .mic-button {
74
- background: linear-gradient(45deg, #FF3366, #BA265D) !important;
75
- border: none !important;
76
- color: white !important;
77
- padding: 12px !important;
78
- border-radius: 50% !important;
79
- height: 50px !important;
80
- width: 50px !important;
81
- margin-left: 10px !important;
82
- }
83
- .mic-button:hover {
84
- transform: scale(1.05) !important;
85
- }
86
- .input-with-mic {
87
- display: flex !important;
88
- align-items: center !important;
89
- gap: 10px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  }
91
- .status-message {
92
- font-style: italic;
93
- color: #666;
94
- margin-top: 5px;
 
 
 
 
 
 
 
 
 
95
  }
96
- """
 
 
 
 
 
 
97
 
98
- with gr.Blocks(css=css) as demo:
99
- gr.Markdown("## ๐ŸŽค Speech to Text Converter")
100
-
101
- with gr.Group():
102
- with gr.Row():
103
- text_input = gr.Textbox(
104
- label="Your Input",
105
- placeholder="Click the mic button and speak...",
106
- elem_classes=["input-box"],
107
- scale=9
108
- )
109
- mic_button = gr.Button(
110
- "๐ŸŽค",
111
- elem_classes=["mic-button"],
112
- scale=1
113
- )
114
-
115
- status_display = gr.Textbox(
116
- label="Status",
117
- visible=False,
118
- interactive=False,
119
- elem_classes=["status-message"]
120
- )
121
-
122
- mic_button.click(
123
- fn=transcribe_audio,
124
- inputs=[gr.Slider(5, 30, value=10, label="Recording time limit (seconds)")],
125
- outputs=[status_display, text_input],
126
- show_progress="hidden"
127
- )
128
-
129
- gr.Examples(
130
- examples=["Hello world", "How are you today?", "Please convert my speech to text"],
131
- inputs=text_input,
132
- label="Try these examples:"
133
- )
134
 
135
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  if __name__ == "__main__":
138
- demo = create_ui()
139
- demo.launch(debug=True)
 
1
+ import streamlit as st
2
+ import time
3
+ import requests
4
+ from streamlit.components.v1 import html
5
+ import os
6
+ from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Voice input dependencies
9
+ import torchaudio
10
+ import numpy as np
11
+ import torch
12
+ from io import BytesIO
13
+ import hashlib
14
+ from audio_recorder_streamlit import audio_recorder
15
+ from transformers import pipeline
16
+
17
+ ######################################
18
+ # Voice Input Helper Functions
19
+ ######################################
20
+
21
+ @st.cache_resource
22
+ def load_voice_model():
23
+ return pipeline("automatic-speech-recognition", model="openai/whisper-base")
24
+
25
+ def process_audio(audio_bytes):
26
+ waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
27
+ if waveform.shape[0] > 1:
28
+ waveform = torch.mean(waveform, dim=0, keepdim=True)
29
+ if sample_rate != 16000:
30
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
31
+ waveform = resampler(waveform)
32
+ return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
33
+
34
+ def get_voice_transcription(state_key):
35
+ if state_key not in st.session_state:
36
+ st.session_state[state_key] = ""
37
+ audio_bytes = audio_recorder(
38
+ key=state_key + "_audio",
39
+ pause_threshold=0.8,
40
+ text="๐ŸŽ™๏ธ Speak your message",
41
+ recording_color="#e8b62c",
42
+ neutral_color="#6aa36f"
43
+ )
44
+ if audio_bytes:
45
+ current_hash = hashlib.md5(audio_bytes).hexdigest()
46
+ last_hash_key = state_key + "_last_hash"
47
+ if st.session_state.get(last_hash_key, "") != current_hash:
48
+ st.session_state[last_hash_key] = current_hash
49
+ try:
50
+ audio_input = process_audio(audio_bytes)
51
+ whisper = load_voice_model()
52
+ transcribed_text = whisper(audio_input)["text"]
53
+ st.info(f"๐Ÿ“ Transcribed: {transcribed_text}")
54
+ st.session_state[state_key] += (" " + transcribed_text).strip()
55
+ st.experimental_rerun()
56
+ except Exception as e:
57
+ st.error(f"Voice input error: {str(e)}")
58
+ return st.session_state[state_key]
59
+
60
+ ######################################
61
+ # Game Functions & Styling
62
+ ######################################
63
+
64
+ @st.cache_resource
65
+ def get_help_agent():
66
+ return pipeline("conversational", model="facebook/blenderbot-400M-distill")
67
+
68
+ def inject_custom_css():
69
+ st.markdown("""
70
+ <style>
71
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
72
+ * { font-family: 'Inter', sans-serif; }
73
+ .title { font-size: 2.8rem !important; font-weight: 800 !important;
74
+ background: linear-gradient(45deg, #6C63FF, #3B82F6);
75
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;
76
+ text-align: center; margin: 1rem 0; }
77
+ .subtitle { font-size: 1.1rem; text-align: center; color: #64748B; margin-bottom: 2.5rem; }
78
+ .question-box { background: white; border-radius: 20px; padding: 2rem; margin: 1.5rem 0;
79
+ box-shadow: 0 10px 25px rgba(0,0,0,0.08); border: 1px solid #e2e8f0; color: black; }
80
+ .input-box { background: white; border-radius: 12px; padding: 1.5rem; margin: 1rem 0;
81
+ box-shadow: 0 4px 6px rgba(0,0,0,0.05); }
82
+ .stTextInput input { border: 2px solid #e2e8f0 !important; border-radius: 10px !important;
83
+ padding: 12px 16px !important; }
84
+ button { background: linear-gradient(45deg, #6C63FF, #3B82F6) !important;
85
+ color: white !important; border-radius: 10px !important;
86
+ padding: 12px 24px !important; font-weight: 600; }
87
+ .final-reveal { font-size: 2.8rem;
88
+ background: linear-gradient(45deg, #6C63FF, #3B82F6);
89
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;
90
+ text-align: center; margin: 2rem 0; font-weight: 800; }
91
+ </style>
92
+ """, unsafe_allow_html=True)
93
+
94
+ def show_confetti():
95
+ html("""
96
+ <canvas id="confetti-canvas" class="confetti"></canvas>
97
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/confetti.browser.min.js"></script>
98
+ <script>
99
+ const count = 200;
100
+ const defaults = { origin: { y: 0.7 }, zIndex: 1050 };
101
+ function fire(particleRatio, opts) {
102
+ confetti(Object.assign({}, defaults, opts, {
103
+ particleCount: Math.floor(count * particleRatio)
104
+ }));
105
  }
106
+ fire(0.25, { spread: 26, startVelocity: 55 });
107
+ fire(0.2, { spread: 60 });
108
+ fire(0.35, { spread: 100, decay: 0.91, scalar: 0.8 });
109
+ fire(0.1, { spread: 120, startVelocity: 25, decay: 0.92, scalar: 1.2 });
110
+ fire(0.1, { spread: 120, startVelocity: 45 });
111
+ </script>
112
+ """)
113
+
114
+ def ask_llama(conversation_history, category, is_final_guess=False):
115
+ api_url = "https://api.groq.com/openai/v1/chat/completions"
116
+ headers = {
117
+ "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}",
118
+ "Content-Type": "application/json"
119
  }
120
+ system_prompt = f"""You're playing 20 questions to guess a {category}. Rules:
121
+ 1. Ask strategic, non-repeating yes/no questions to narrow down.
122
+ 2. Use all previous answers smartly.
123
+ 3. If you're 80%+ sure, say: Final Guess: [your guess]
124
+ 4. For places: ask about continent, country, landmarks, etc.
125
+ 5. For people: ask if real, profession, gender, etc.
126
+ 6. For objects: ask about use, size, material, etc."""
127
 
128
+ prompt = f"""Based on these answers about a {category}, provide ONLY your final guess with no extra text:
129
+ {conversation_history}""" if is_final_guess else "Ask your next smart yes/no question."
130
+
131
+ messages = [{"role": "system", "content": system_prompt}]
132
+ messages += conversation_history
133
+ messages.append({"role": "user", "content": prompt})
134
+
135
+ data = {
136
+ "model": "llama-3-70b-8192",
137
+ "messages": messages,
138
+ "temperature": 0.8,
139
+ "max_tokens": 100
140
+ }
141
+
142
+ try:
143
+ res = requests.post(api_url, headers=headers, json=data)
144
+ res.raise_for_status()
145
+ return res.json()["choices"][0]["message"]["content"]
146
+ except Exception as e:
147
+ st.error(f"โŒ LLaMA API error: {e}")
148
+ return "..."
149
+
150
+ ######################################
151
+ # Main App Logic Here (UI, Game Loop)
152
+ ######################################
153
+
154
+ def main():
155
+ load_dotenv()
156
+ inject_custom_css()
157
+
158
+ st.title("๐ŸŽฎ Guess It! - 20 Questions Game")
159
+ st.markdown("<div class='subtitle'>Think of a person, place, or object. LLaMA will try to guess it!</div>", unsafe_allow_html=True)
160
+
161
+ category = st.selectbox("Category of your secret:", ["Person", "Place", "Object"])
 
 
162
 
163
+ if "conversation" not in st.session_state:
164
+ st.session_state.conversation = []
165
+ st.session_state.last_bot_msg = ""
166
+
167
+ if st.button("๐Ÿ”„ Restart Game"):
168
+ st.session_state.conversation = []
169
+ st.session_state.last_bot_msg = ""
170
+ st.rerun()
171
+
172
+ if not st.session_state.conversation:
173
+ st.session_state.last_bot_msg = ask_llama([], category)
174
+ st.session_state.conversation.append({"role": "assistant", "content": st.session_state.last_bot_msg})
175
+
176
+ st.markdown(f"<div class='question-box'><strong>LLaMA:</strong> {st.session_state.last_bot_msg}</div>", unsafe_allow_html=True)
177
+
178
+ user_input = get_voice_transcription("voice_input") or st.text_input("๐Ÿ’ฌ Your answer (yes/no/sometimes):")
179
+
180
+ if st.button("Submit Answer") and user_input:
181
+ st.session_state.conversation.append({"role": "user", "content": user_input})
182
+ with st.spinner("Thinking..."):
183
+ response = ask_llama(st.session_state.conversation, category)
184
+ st.session_state.last_bot_msg = response
185
+ st.session_state.conversation.append({"role": "assistant", "content": response})
186
+ st.rerun()
187
+
188
+ if st.button("๐Ÿค” Make Final Guess"):
189
+ with st.spinner("Making final guess..."):
190
+ final_guess = ask_llama(st.session_state.conversation, category, is_final_guess=True)
191
+ st.markdown(f"<div class='final-reveal'>๐Ÿคฏ Final Guess: <strong>{final_guess}</strong></div>", unsafe_allow_html=True)
192
+ show_confetti()
193
 
194
  if __name__ == "__main__":
195
+ main()