iisadia commited on
Commit
b8feb9b
·
verified ·
1 Parent(s): 5d2cee9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -42
app.py CHANGED
@@ -13,9 +13,7 @@ from io import BytesIO
13
  import hashlib
14
  from audio_recorder_streamlit import audio_recorder
15
  from transformers import pipeline
16
-
17
-
18
-
19
 
20
  ######################################
21
  # Voice Input Helper Functions
@@ -35,33 +33,76 @@ def process_audio(audio_bytes):
35
  waveform = resampler(waveform)
36
  return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
37
 
38
- def get_voice_transcription(state_key):
39
  """Display audio recorder for a given key.
40
  If new audio is recorded, transcribe it and update the session state.
41
  """
42
  if state_key not in st.session_state:
43
  st.session_state[state_key] = ""
44
- # Use a unique key for the recorder widget
45
- audio_bytes = audio_recorder(key=state_key + "_audio",
46
- pause_threshold=0.8,
47
- text="Speak to type",
48
- recording_color="#e8b62c",
49
- neutral_color="#6aa36f")
 
 
 
 
 
 
 
 
 
 
50
  if audio_bytes:
51
  current_hash = hashlib.md5(audio_bytes).hexdigest()
52
  last_hash_key = state_key + "_last_hash"
 
53
  if st.session_state.get(last_hash_key, "") != current_hash:
54
  st.session_state[last_hash_key] = current_hash
 
 
 
 
 
 
 
 
 
 
 
55
  try:
56
  audio_input = process_audio(audio_bytes)
57
  whisper = load_voice_model()
 
 
 
58
  transcribed_text = whisper(audio_input)["text"]
 
 
 
 
 
 
59
  st.info(f"📝 Transcribed: {transcribed_text}")
 
 
 
 
 
 
 
 
 
60
  # Append (or set) new transcription
61
- st.session_state[state_key] += (" " + transcribed_text).strip()
62
  st.experimental_rerun()
 
63
  except Exception as e:
64
  st.error(f"Voice input error: {str(e)}")
 
 
65
  return st.session_state[state_key]
66
 
67
  ######################################
@@ -122,6 +163,34 @@ def inject_custom_css():
122
  .progress-fill { height: 100%; background: linear-gradient(90deg, #6C63FF, #3B82F6);
123
  transition: width 0.5s ease; }
124
  .question-count { color: #6C63FF; font-weight: 600; font-size: 0.9rem; margin-bottom: 0.5rem; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  </style>
127
  """, unsafe_allow_html=True)
@@ -266,22 +335,33 @@ def main():
266
 
267
  with st.form("start_form"):
268
  # --- Voice Input for Category ---
269
- st.markdown("#### Use Voice (English/Urdu) for Category Input")
270
- voice_category = get_voice_transcription("voice_category")
271
- # The text input now defaults to any spoken words
272
- category_input = st.text_input("Enter category (person/place/object):",
273
- value=voice_category.strip(),
274
- key="category_input").strip().lower()
 
 
 
 
 
 
 
 
 
 
275
  if st.form_submit_button("Start Game"):
276
- if not category_input:
 
277
  st.error("Please enter a category!")
278
- elif category_input not in ["person", "place", "object"]:
279
  st.error("Please enter either 'person', 'place', or 'object'!")
280
  else:
281
- st.session_state.category = category_input
282
  first_question = ask_llama([
283
  {"role": "user", "content": "Ask your first strategic yes/no question."}
284
- ], category_input)
285
  st.session_state.questions = [first_question]
286
  st.session_state.conversation_history = [
287
  {"role": "assistant", "content": first_question}
@@ -318,18 +398,30 @@ def main():
318
  st.experimental_rerun()
319
  with st.form("answer_form"):
320
  # --- Voice Input for Answer ---
321
- st.markdown("#### Use Voice (English/Urdu) for Your Answer")
322
- voice_answer = get_voice_transcription("voice_answer")
323
- answer_input = st.text_input("Your answer (yes/no/both):",
324
- value=voice_answer.strip(),
325
- key=f"answer_{st.session_state.current_q}").strip().lower()
 
 
 
 
 
 
 
 
 
 
 
326
  if st.form_submit_button("Submit"):
327
- if answer_input not in ["yes", "no", "both"]:
 
328
  st.error("Please answer with 'yes', 'no', or 'both'!")
329
  else:
330
- st.session_state.answers.append(answer_input)
331
  st.session_state.conversation_history.append(
332
- {"role": "user", "content": answer_input}
333
  )
334
  next_response = ask_llama(
335
  st.session_state.conversation_history,
@@ -347,19 +439,34 @@ def main():
347
  if st.session_state.current_q >= 20:
348
  st.session_state.game_state = "result"
349
  st.experimental_rerun()
 
350
  with st.expander("Need Help? Chat with AI Assistant"):
351
  # --- Voice Input for Help Query ---
352
- st.markdown("#### Use Voice (English/Urdu) for Help Query")
353
- voice_help = get_voice_transcription("voice_help")
354
- help_query = st.text_input("Enter your help query:",
355
- value=voice_help.strip(),
356
- key="help_query")
 
 
 
 
 
 
 
 
 
 
 
357
  if st.button("Send", key="send_help"):
358
- if help_query:
359
- help_response = ask_help_agent(help_query)
360
- st.session_state.help_conversation.append({"query": help_query, "response": help_response})
 
 
361
  else:
362
  st.error("Please enter a query!")
 
363
  if st.session_state.help_conversation:
364
  for msg in st.session_state.help_conversation:
365
  st.markdown(f"**You:** {msg['query']}")
@@ -381,15 +488,30 @@ def main():
381
  </div>
382
  ''', unsafe_allow_html=True)
383
  with st.form("confirm_form"):
384
- confirm_input = st.text_input("Type your answer (yes/no/both):", key="confirm_input").strip().lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  if st.form_submit_button("Submit"):
386
- if confirm_input not in ["yes", "no", "both"]:
 
387
  st.error("Please answer with 'yes', 'no', or 'both'!")
388
  else:
389
- if confirm_input == "yes":
390
  st.session_state.game_state = "result"
391
  st.experimental_rerun()
392
- st.stop()
393
  else:
394
  st.session_state.conversation_history.append(
395
  {"role": "user", "content": "no"}
@@ -430,4 +552,4 @@ def main():
430
  st.experimental_rerun()
431
 
432
  if __name__ == "__main__":
433
- main()
 
13
  import hashlib
14
  from audio_recorder_streamlit import audio_recorder
15
  from transformers import pipeline
16
+ from datetime import datetime
 
 
17
 
18
  ######################################
19
  # Voice Input Helper Functions
 
33
  waveform = resampler(waveform)
34
  return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
35
 
36
+ def get_voice_transcription(state_key, input_container):
37
  """Display audio recorder for a given key.
38
  If new audio is recorded, transcribe it and update the session state.
39
  """
40
  if state_key not in st.session_state:
41
  st.session_state[state_key] = ""
42
+
43
+ # Create a unique key for the recorder widget
44
+ recorder_key = f"{state_key}_audio_{hash(input_container) if input_container else ''}"
45
+
46
+ # Use columns to place mic button inside input field
47
+ col1, col2 = input_container.columns([0.85, 0.15])
48
+
49
+ # Audio recorder with custom styling
50
+ audio_bytes = audio_recorder(key=recorder_key,
51
+ pause_threshold=1.5, # Shorter pause for quick responses
52
+ text="",
53
+ recording_color="#e8b62c",
54
+ neutral_color="#6aa36f",
55
+ icon_name="microphone",
56
+ icon_size="1.5em")
57
+
58
  if audio_bytes:
59
  current_hash = hashlib.md5(audio_bytes).hexdigest()
60
  last_hash_key = state_key + "_last_hash"
61
+
62
  if st.session_state.get(last_hash_key, "") != current_hash:
63
  st.session_state[last_hash_key] = current_hash
64
+
65
+ # Show processing indicator
66
+ processing_placeholder = input_container.empty()
67
+ start_time = datetime.now()
68
+ processing_placeholder.markdown(
69
+ f"<div style='color: #6C63FF; font-size: 0.8em; margin-top: -10px;'>"
70
+ f"<i class='fas fa-spinner fa-spin'></i> Processing speech..."
71
+ f"</div>",
72
+ unsafe_allow_html=True
73
+ )
74
+
75
  try:
76
  audio_input = process_audio(audio_bytes)
77
  whisper = load_voice_model()
78
+
79
+ # Measure processing time
80
+ processing_start = time.time()
81
  transcribed_text = whisper(audio_input)["text"]
82
+ processing_time = time.time() - processing_start
83
+
84
+ # For short responses (yes/no/both), use a simpler model if available
85
+ if len(transcribed_text.split()) <= 2:
86
+ transcribed_text = transcribed_text.lower().strip()
87
+
88
  st.info(f"📝 Transcribed: {transcribed_text}")
89
+
90
+ # Show processing time feedback
91
+ processing_placeholder.markdown(
92
+ f"<div style='color: #6C63FF; font-size: 0.8em; margin-top: -10px;'>"
93
+ f"<i class='fas fa-check-circle'></i> Processed in {processing_time:.1f}s"
94
+ f"</div>",
95
+ unsafe_allow_html=True
96
+ )
97
+
98
  # Append (or set) new transcription
99
+ st.session_state[state_key] = transcribed_text
100
  st.experimental_rerun()
101
+
102
  except Exception as e:
103
  st.error(f"Voice input error: {str(e)}")
104
+ processing_placeholder.empty()
105
+
106
  return st.session_state[state_key]
107
 
108
  ######################################
 
163
  .progress-fill { height: 100%; background: linear-gradient(90deg, #6C63FF, #3B82F6);
164
  transition: width 0.5s ease; }
165
  .question-count { color: #6C63FF; font-weight: 600; font-size: 0.9rem; margin-bottom: 0.5rem; }
166
+ .mic-button { position: absolute; right: 10px; top: 50%; transform: translateY(-50%);
167
+ background: none; border: none; cursor: pointer; color: #6C63FF; }
168
+ .processing-indicator { color: #6C63FF; font-size: 0.8em; margin-top: -10px; }
169
+
170
+ /* Custom audio recorder styles */
171
+ .audio-recorder {
172
+ background: none !important;
173
+ box-shadow: none !important;
174
+ padding: 0 !important;
175
+ margin: 0 !important;
176
+ min-width: auto !important;
177
+ height: auto !important;
178
+ }
179
+ .audio-recorder:hover {
180
+ transform: scale(1.1) !important;
181
+ }
182
+ .audio-recorder svg {
183
+ color: #6C63FF !important;
184
+ }
185
+ .audio-recorder.recording svg {
186
+ color: #e74c3c !important;
187
+ animation: pulse 1.5s infinite;
188
+ }
189
+ @keyframes pulse {
190
+ 0% { transform: scale(1); }
191
+ 50% { transform: scale(1.2); }
192
+ 100% { transform: scale(1); }
193
+ }
194
 
195
  </style>
196
  """, unsafe_allow_html=True)
 
335
 
336
  with st.form("start_form"):
337
  # --- Voice Input for Category ---
338
+ st.markdown("#### Speak your category (person/place/object)")
339
+ category_container = st.empty()
340
+ category_input = category_container.text_input(
341
+ "Enter category (person/place/object):",
342
+ key="category_input"
343
+ )
344
+
345
+ # Get voice transcription and update the input field
346
+ voice_category = get_voice_transcription("voice_category", category_container)
347
+ if voice_category and voice_category != category_input:
348
+ category_container.text_input(
349
+ "Enter category (person/place/object):",
350
+ value=voice_category.strip(),
351
+ key="category_input_updated"
352
+ )
353
+
354
  if st.form_submit_button("Start Game"):
355
+ final_category = st.session_state.get("voice_category", "").strip() or category_input.strip().lower()
356
+ if not final_category:
357
  st.error("Please enter a category!")
358
+ elif final_category not in ["person", "place", "object"]:
359
  st.error("Please enter either 'person', 'place', or 'object'!")
360
  else:
361
+ st.session_state.category = final_category
362
  first_question = ask_llama([
363
  {"role": "user", "content": "Ask your first strategic yes/no question."}
364
+ ], final_category)
365
  st.session_state.questions = [first_question]
366
  st.session_state.conversation_history = [
367
  {"role": "assistant", "content": first_question}
 
398
  st.experimental_rerun()
399
  with st.form("answer_form"):
400
  # --- Voice Input for Answer ---
401
+ st.markdown("#### Speak your answer (yes/no/both)")
402
+ answer_container = st.empty()
403
+ answer_input = answer_container.text_input(
404
+ "Your answer (yes/no/both):",
405
+ key=f"answer_{st.session_state.current_q}"
406
+ )
407
+
408
+ # Get voice transcription and update the input field
409
+ voice_answer = get_voice_transcription("voice_answer", answer_container)
410
+ if voice_answer and voice_answer != answer_input:
411
+ answer_container.text_input(
412
+ "Your answer (yes/no/both):",
413
+ value=voice_answer.strip(),
414
+ key=f"answer_updated_{st.session_state.current_q}"
415
+ )
416
+
417
  if st.form_submit_button("Submit"):
418
+ final_answer = st.session_state.get("voice_answer", "").strip().lower() or answer_input.strip().lower()
419
+ if final_answer not in ["yes", "no", "both"]:
420
  st.error("Please answer with 'yes', 'no', or 'both'!")
421
  else:
422
+ st.session_state.answers.append(final_answer)
423
  st.session_state.conversation_history.append(
424
+ {"role": "user", "content": final_answer}
425
  )
426
  next_response = ask_llama(
427
  st.session_state.conversation_history,
 
439
  if st.session_state.current_q >= 20:
440
  st.session_state.game_state = "result"
441
  st.experimental_rerun()
442
+
443
  with st.expander("Need Help? Chat with AI Assistant"):
444
  # --- Voice Input for Help Query ---
445
+ st.markdown("#### Speak your help query")
446
+ help_container = st.empty()
447
+ help_query = help_container.text_input(
448
+ "Enter your help query:",
449
+ key="help_query"
450
+ )
451
+
452
+ # Get voice transcription and update the input field
453
+ voice_help = get_voice_transcription("voice_help", help_container)
454
+ if voice_help and voice_help != help_query:
455
+ help_container.text_input(
456
+ "Enter your help query:",
457
+ value=voice_help.strip(),
458
+ key="help_query_updated"
459
+ )
460
+
461
  if st.button("Send", key="send_help"):
462
+ final_help_query = st.session_state.get("voice_help", "").strip() or help_query.strip()
463
+ if final_help_query:
464
+ help_response = ask_help_agent(final_help_query)
465
+ st.session_state.help_conversation.append({"query": final_help_query, "response": help_response})
466
+ st.experimental_rerun()
467
  else:
468
  st.error("Please enter a query!")
469
+
470
  if st.session_state.help_conversation:
471
  for msg in st.session_state.help_conversation:
472
  st.markdown(f"**You:** {msg['query']}")
 
488
  </div>
489
  ''', unsafe_allow_html=True)
490
  with st.form("confirm_form"):
491
+ # --- Voice Input for Confirmation ---
492
+ confirm_container = st.empty()
493
+ confirm_input = confirm_container.text_input(
494
+ "Type your answer (yes/no/both):",
495
+ key="confirm_input"
496
+ )
497
+
498
+ # Get voice transcription and update the input field
499
+ voice_confirm = get_voice_transcription("voice_confirm", confirm_container)
500
+ if voice_confirm and voice_confirm != confirm_input:
501
+ confirm_container.text_input(
502
+ "Type your answer (yes/no/both):",
503
+ value=voice_confirm.strip(),
504
+ key="confirm_input_updated"
505
+ )
506
+
507
  if st.form_submit_button("Submit"):
508
+ final_confirm = st.session_state.get("voice_confirm", "").strip().lower() or confirm_input.strip().lower()
509
+ if final_confirm not in ["yes", "no", "both"]:
510
  st.error("Please answer with 'yes', 'no', or 'both'!")
511
  else:
512
+ if final_confirm == "yes":
513
  st.session_state.game_state = "result"
514
  st.experimental_rerun()
 
515
  else:
516
  st.session_state.conversation_history.append(
517
  {"role": "user", "content": "no"}
 
552
  st.experimental_rerun()
553
 
554
  if __name__ == "__main__":
555
+ main()