IAMTFRMZA commited on
Commit
c0ce26b
Β·
verified Β·
1 Parent(s): 4a0a44f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -29
app.py CHANGED
@@ -20,7 +20,7 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
20
  client = OpenAI(api_key=OPENAI_API_KEY)
21
  session_threads = {}
22
 
23
- # ------------------ Session & Chat Logic ------------------
24
  def reset_session():
25
  session_id = str(uuid.uuid4())
26
  session_threads[session_id] = client.beta.threads.create().id
@@ -54,16 +54,7 @@ def handle_chat(message, history, session_id):
54
  image = extract_image_url(response)
55
  return history, image
56
 
57
- # ------------------ Transcript Actions ------------------
58
- def ask_from_transcript(transcript, history, session_id):
59
- return handle_chat(transcript, history, session_id)
60
-
61
- def clear_all(client_id):
62
- if client_id in connections:
63
- connections[client_id].transcript = ""
64
- return "", []
65
-
66
- # ------------------ Real-Time Voice WebSocket ------------------
67
  def create_websocket_client():
68
  client_id = str(uuid.uuid4())
69
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
@@ -83,40 +74,32 @@ def send_audio_chunk(audio, client_id):
83
  return connections[client_id].transcript
84
 
85
  # ------------------ UI ------------------
86
- with gr.Blocks(theme=gr.themes.Soft(), css="""
87
- @media (max-width: 768px) {
88
- .gr-col { width: 100% !important; }
89
- .gr-row > div { flex-direction: column !important; }
90
- }
91
- """) as demo:
92
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
93
 
94
  session_id = gr.State(value=reset_session())
95
  client_id = gr.State()
96
 
97
  with gr.Row():
98
- with gr.Column(scale=1):
99
- image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=480)
100
- with gr.Column(scale=2):
101
- chatbot = gr.Chatbot(label="πŸ’¬ Document Assistant", height=480)
102
  text_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What is clause 3.2?")
103
  send_btn = gr.Button("Send")
104
 
105
  send_btn.click(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
106
  text_input.submit(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
107
 
 
108
  with gr.Accordion("🎀 Or Use Voice Instead", open=False):
109
  with gr.Row():
110
- transcript_box = gr.Textbox(label="Live Transcript", lines=6, interactive=False, autoscroll=True)
111
- with gr.Row():
112
- mic_input = gr.Audio(label="πŸŽ™οΈ Record", streaming=True)
113
  with gr.Row():
114
- ask_btn = gr.Button("πŸ€– Ask from Transcript", size="lg")
115
- clear_btn = gr.Button("🧹 Clear Chat", size="lg")
116
 
117
  mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
118
- ask_btn.click(fn=ask_from_transcript, inputs=[transcript_box, chatbot, session_id], outputs=[chatbot, image_display])
119
- clear_btn.click(fn=clear_all, inputs=[client_id], outputs=[transcript_box, chatbot])
120
  demo.load(fn=create_websocket_client, outputs=client_id)
121
 
122
- demo.launch()
 
20
  client = OpenAI(api_key=OPENAI_API_KEY)
21
  session_threads = {}
22
 
23
+ # ------------------ Chat Logic ------------------
24
  def reset_session():
25
  session_id = str(uuid.uuid4())
26
  session_threads[session_id] = client.beta.threads.create().id
 
54
  image = extract_image_url(response)
55
  return history, image
56
 
57
+ # ------------------ Voice Logic ------------------
 
 
 
 
 
 
 
 
 
58
  def create_websocket_client():
59
  client_id = str(uuid.uuid4())
60
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
 
74
  return connections[client_id].transcript
75
 
76
  # ------------------ UI ------------------
77
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
 
 
 
78
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
79
 
80
  session_id = gr.State(value=reset_session())
81
  client_id = gr.State()
82
 
83
  with gr.Row():
84
+ image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=360)
85
+ with gr.Column():
86
+ chatbot = gr.Chatbot(label="πŸ’¬ Document Assistant", height=360)
 
87
  text_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What is clause 3.2?")
88
  send_btn = gr.Button("Send")
89
 
90
  send_btn.click(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
91
  text_input.submit(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
92
 
93
+ # Toggle Section
94
  with gr.Accordion("🎀 Or Use Voice Instead", open=False):
95
  with gr.Row():
96
+ transcript_box = gr.Textbox(label="Live Transcript", lines=7, interactive=False, autoscroll=True)
 
 
97
  with gr.Row():
98
+ mic_input = gr.Audio(streaming=True)
99
+ clear_button = gr.Button("Clear Transcript")
100
 
101
  mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
102
+ clear_button.click(fn=clear_transcript, inputs=[client_id], outputs=transcript_box)
 
103
  demo.load(fn=create_websocket_client, outputs=client_id)
104
 
105
+ demo.launch()