Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
|
|
20 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
21 |
session_threads = {}
|
22 |
|
23 |
-
# ------------------
|
24 |
def reset_session():
|
25 |
session_id = str(uuid.uuid4())
|
26 |
session_threads[session_id] = client.beta.threads.create().id
|
@@ -54,16 +54,7 @@ def handle_chat(message, history, session_id):
|
|
54 |
image = extract_image_url(response)
|
55 |
return history, image
|
56 |
|
57 |
-
# ------------------
|
58 |
-
def ask_from_transcript(transcript, history, session_id):
|
59 |
-
return handle_chat(transcript, history, session_id)
|
60 |
-
|
61 |
-
def clear_all(client_id):
|
62 |
-
if client_id in connections:
|
63 |
-
connections[client_id].transcript = ""
|
64 |
-
return "", []
|
65 |
-
|
66 |
-
# ------------------ Real-Time Voice WebSocket ------------------
|
67 |
def create_websocket_client():
|
68 |
client_id = str(uuid.uuid4())
|
69 |
connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
|
@@ -83,40 +74,32 @@ def send_audio_chunk(audio, client_id):
|
|
83 |
return connections[client_id].transcript
|
84 |
|
85 |
# ------------------ UI ------------------
|
86 |
-
with gr.Blocks(theme=gr.themes.Soft()
|
87 |
-
@media (max-width: 768px) {
|
88 |
-
.gr-col { width: 100% !important; }
|
89 |
-
.gr-row > div { flex-direction: column !important; }
|
90 |
-
}
|
91 |
-
""") as demo:
|
92 |
gr.Markdown("# π§ Document AI + ποΈ Voice Assistant")
|
93 |
|
94 |
session_id = gr.State(value=reset_session())
|
95 |
client_id = gr.State()
|
96 |
|
97 |
with gr.Row():
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
chatbot = gr.Chatbot(label="π¬ Document Assistant", height=480)
|
102 |
text_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What is clause 3.2?")
|
103 |
send_btn = gr.Button("Send")
|
104 |
|
105 |
send_btn.click(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
|
106 |
text_input.submit(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
|
107 |
|
|
|
108 |
with gr.Accordion("π€ Or Use Voice Instead", open=False):
|
109 |
with gr.Row():
|
110 |
-
transcript_box = gr.Textbox(label="Live Transcript", lines=
|
111 |
-
with gr.Row():
|
112 |
-
mic_input = gr.Audio(label="ποΈ Record", streaming=True)
|
113 |
with gr.Row():
|
114 |
-
|
115 |
-
|
116 |
|
117 |
mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
|
118 |
-
|
119 |
-
clear_btn.click(fn=clear_all, inputs=[client_id], outputs=[transcript_box, chatbot])
|
120 |
demo.load(fn=create_websocket_client, outputs=client_id)
|
121 |
|
122 |
-
demo.launch()
|
|
|
20 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
21 |
session_threads = {}
|
22 |
|
23 |
+
# ------------------ Chat Logic ------------------
|
24 |
def reset_session():
|
25 |
session_id = str(uuid.uuid4())
|
26 |
session_threads[session_id] = client.beta.threads.create().id
|
|
|
54 |
image = extract_image_url(response)
|
55 |
return history, image
|
56 |
|
57 |
+
# ------------------ Voice Logic ------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def create_websocket_client():
|
59 |
client_id = str(uuid.uuid4())
|
60 |
connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
|
|
|
74 |
return connections[client_id].transcript
|
75 |
|
76 |
# ------------------ UI ------------------
|
77 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
|
|
|
|
|
|
|
78 |
gr.Markdown("# π§ Document AI + ποΈ Voice Assistant")
|
79 |
|
80 |
session_id = gr.State(value=reset_session())
|
81 |
client_id = gr.State()
|
82 |
|
83 |
with gr.Row():
|
84 |
+
image_display = gr.Image(label="π Extracted Document Image", show_label=True, height=360)
|
85 |
+
with gr.Column():
|
86 |
+
chatbot = gr.Chatbot(label="π¬ Document Assistant", height=360)
|
|
|
87 |
text_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What is clause 3.2?")
|
88 |
send_btn = gr.Button("Send")
|
89 |
|
90 |
send_btn.click(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
|
91 |
text_input.submit(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
|
92 |
|
93 |
+
# Toggle Section
|
94 |
with gr.Accordion("π€ Or Use Voice Instead", open=False):
|
95 |
with gr.Row():
|
96 |
+
transcript_box = gr.Textbox(label="Live Transcript", lines=7, interactive=False, autoscroll=True)
|
|
|
|
|
97 |
with gr.Row():
|
98 |
+
mic_input = gr.Audio(streaming=True)
|
99 |
+
clear_button = gr.Button("Clear Transcript")
|
100 |
|
101 |
mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
|
102 |
+
clear_button.click(fn=clear_transcript, inputs=[client_id], outputs=transcript_box)
|
|
|
103 |
demo.load(fn=create_websocket_client, outputs=client_id)
|
104 |
|
105 |
+
demo.launch()
|