IAMTFRMZA commited on
Commit
e10a51a
Β·
verified Β·
1 Parent(s): eb04d10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -70
app.py CHANGED
@@ -5,28 +5,25 @@ import uuid
5
  import threading
6
  import time
7
  import re
8
-
9
- from openai import OpenAI
10
  from dotenv import load_dotenv
 
11
  from realtime_transcriber import WebSocketClient, connections, WEBSOCKET_URI, WEBSOCKET_HEADERS
12
 
13
- # ------------------ Load API Key ------------------
14
  load_dotenv()
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
17
 
18
  if not OPENAI_API_KEY or not ASSISTANT_ID:
19
- raise ValueError("Missing OPENAI_API_KEY or ASSISTANT_ID in environment variables")
20
 
21
  client = OpenAI(api_key=OPENAI_API_KEY)
22
-
23
- # ------------------ Chat Threading ------------------
24
  session_threads = {}
25
 
 
26
  def reset_session():
27
  session_id = str(uuid.uuid4())
28
- thread = client.beta.threads.create()
29
- session_threads[session_id] = thread.id
30
  return session_id
31
 
32
  def process_chat(message, history, session_id):
@@ -35,50 +32,29 @@ def process_chat(message, history, session_id):
35
  thread_id = client.beta.threads.create().id
36
  session_threads[session_id] = thread_id
37
 
38
- client.beta.threads.messages.create(
39
- thread_id=thread_id,
40
- role="user",
41
- content=message
42
- )
43
-
44
- run = client.beta.threads.runs.create(
45
- thread_id=thread_id,
46
- assistant_id=ASSISTANT_ID
47
- )
48
-
49
- while True:
50
- run_status = client.beta.threads.runs.retrieve(
51
- thread_id=thread_id,
52
- run_id=run.id
53
- )
54
- if run_status.status == "completed":
55
- break
56
  time.sleep(1)
57
 
58
  messages = client.beta.threads.messages.list(thread_id=thread_id)
59
- assistant_response = "⚠️ Assistant did not respond."
60
  for msg in reversed(messages.data):
61
  if msg.role == "assistant":
62
- assistant_response = msg.content[0].text.value
63
- break
64
-
65
- return assistant_response
66
 
67
  def extract_image_url(text):
68
- match = re.search(
69
- r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
70
- text
71
- )
72
  return match.group(0) if match else None
73
 
74
- def chat_handler(message, history, session_id):
75
- history.append(("user", message))
76
  response = process_chat(message, history, session_id)
77
- history.append(("assistant", response))
78
- image_url = extract_image_url(response)
79
- return history, image_url
80
 
81
- # ------------------ Transcription ------------------
82
  def create_websocket_client():
83
  client_id = str(uuid.uuid4())
84
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
@@ -97,7 +73,7 @@ def send_audio_chunk(audio, client_id):
97
  connections[client_id].enqueue_audio_chunk(sr, y)
98
  return connections[client_id].transcript
99
 
100
- # ------------------ Gradio App ------------------
101
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
102
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
103
 
@@ -105,33 +81,25 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
  client_id = gr.State()
106
 
107
  with gr.Row():
108
- with gr.Column(scale=1):
109
- image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=400)
110
-
111
- with gr.Column(scale=2):
112
- chatbot = gr.Chatbot(label="πŸ’¬ Document Assistant", height=400)
113
- message_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What does clause 3.2 mean?")
114
- send_button = gr.Button("Send")
115
-
116
- # Send message logic
117
- def user_send(msg, history, session_id):
118
- return chat_handler(msg, history, session_id)
119
-
120
- send_button.click(user_send, inputs=[message_input, chatbot, session_id], outputs=[chatbot, image_display])
121
- message_input.submit(user_send, inputs=[message_input, chatbot, session_id], outputs=[chatbot, image_display])
122
-
123
- # ------------------ Voice Section ------------------
124
- gr.Markdown("## πŸŽ™οΈ Realtime Voice Transcription")
125
-
126
- with gr.Row():
127
- transcript_box = gr.Textbox(label="Live Transcript", lines=7, interactive=False, autoscroll=True)
128
-
129
- with gr.Row():
130
- mic_input = gr.Audio(streaming=True)
131
- clear_button = gr.Button("Clear Transcript")
132
-
133
- mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
134
- clear_button.click(fn=clear_transcript, inputs=[client_id], outputs=transcript_box)
135
- demo.load(fn=create_websocket_client, outputs=client_id)
136
 
137
  demo.launch()
 
5
  import threading
6
  import time
7
  import re
 
 
8
  from dotenv import load_dotenv
9
+ from openai import OpenAI
10
  from realtime_transcriber import WebSocketClient, connections, WEBSOCKET_URI, WEBSOCKET_HEADERS
11
 
12
+ # ------------------ Load Secrets ------------------
13
  load_dotenv()
14
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
16
 
17
  if not OPENAI_API_KEY or not ASSISTANT_ID:
18
+ raise ValueError("Missing OPENAI_API_KEY or ASSISTANT_ID")
19
 
20
  client = OpenAI(api_key=OPENAI_API_KEY)
 
 
21
  session_threads = {}
22
 
23
+ # ------------------ Chat Logic ------------------
24
  def reset_session():
25
  session_id = str(uuid.uuid4())
26
+ session_threads[session_id] = client.beta.threads.create().id
 
27
  return session_id
28
 
29
  def process_chat(message, history, session_id):
 
32
  thread_id = client.beta.threads.create().id
33
  session_threads[session_id] = thread_id
34
 
35
+ client.beta.threads.messages.create(thread_id=thread_id, role="user", content=message)
36
+ run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
37
+
38
+ while client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id).status != "completed":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  time.sleep(1)
40
 
41
  messages = client.beta.threads.messages.list(thread_id=thread_id)
 
42
  for msg in reversed(messages.data):
43
  if msg.role == "assistant":
44
+ return msg.content[0].text.value
45
+ return "⚠️ Assistant did not respond."
 
 
46
 
47
  def extract_image_url(text):
48
+ match = re.search(r'https://raw\.githubusercontent\.com/[^\s"]+\.png', text)
 
 
 
49
  return match.group(0) if match else None
50
 
51
+ def handle_chat(message, history, session_id):
 
52
  response = process_chat(message, history, session_id)
53
+ history.append((message, response))
54
+ image = extract_image_url(response)
55
+ return history, image
56
 
57
+ # ------------------ Voice Logic ------------------
58
  def create_websocket_client():
59
  client_id = str(uuid.uuid4())
60
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
 
73
  connections[client_id].enqueue_audio_chunk(sr, y)
74
  return connections[client_id].transcript
75
 
76
+ # ------------------ UI ------------------
77
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
78
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
79
 
 
81
  client_id = gr.State()
82
 
83
  with gr.Row():
84
+ image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=360)
85
+ with gr.Column():
86
+ chatbot = gr.Chatbot(label="πŸ’¬ Document Assistant", height=360)
87
+ text_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What is clause 3.2?")
88
+ send_btn = gr.Button("Send")
89
+
90
+ send_btn.click(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
91
+ text_input.submit(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
92
+
93
+ # Toggle Section
94
+ with gr.Accordion("🎀 Or Use Voice Instead", open=False):
95
+ with gr.Row():
96
+ transcript_box = gr.Textbox(label="Live Transcript", lines=7, interactive=False, autoscroll=True)
97
+ with gr.Row():
98
+ mic_input = gr.Audio(streaming=True)
99
+ clear_button = gr.Button("Clear Transcript")
100
+
101
+ mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
102
+ clear_button.click(fn=clear_transcript, inputs=[client_id], outputs=transcript_box)
103
+ demo.load(fn=create_websocket_client, outputs=client_id)
 
 
 
 
 
 
 
 
104
 
105
  demo.launch()