IAMTFRMZA commited on
Commit
91f00be
Β·
verified Β·
1 Parent(s): 0ee04bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -151
app.py CHANGED
@@ -1,161 +1,40 @@
1
- import streamlit as st
2
- import os
3
- import time
4
- import re
5
- import json
6
- import requests
7
- from PIL import Image
8
- from openai import OpenAI
9
- from io import BytesIO
10
-
11
- # ------------------ App Configuration ------------------
12
- st.set_page_config(page_title="Document AI Assistant", layout="wide")
13
- st.title("πŸ“„ Document AI Assistant")
14
- st.caption("Chat with an AI Assistant on your medical/pathology documents")
15
-
16
- # ------------------ Load API Key and Assistant ID ------------------
17
- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
18
- ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
19
-
20
- if not OPENAI_API_KEY or not ASSISTANT_ID:
21
- st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
22
- st.stop()
23
-
24
- client = OpenAI(api_key=OPENAI_API_KEY)
25
-
26
- # ------------------ Load Structured JSON ------------------
27
- STRUCTURED_JSON_PATH = "51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json"
28
- try:
29
- with open(STRUCTURED_JSON_PATH, "r") as f:
30
- structured_data = json.load(f)
31
- except Exception as e:
32
- st.error(f"❌ Failed to load structured summary file: {e}")
33
- st.stop()
34
-
35
- # ------------------ Session State Initialization ------------------
36
- if "messages" not in st.session_state:
37
- st.session_state.messages = []
38
- if "thread_id" not in st.session_state:
39
- st.session_state.thread_id = None
40
- if "image_url" not in st.session_state:
41
- st.session_state.image_url = None
42
- if "image_updated" not in st.session_state:
43
- st.session_state.image_updated = False
44
-
45
- # ------------------ Sidebar Controls ------------------
46
- st.sidebar.header("πŸ”§ Settings")
47
- if st.sidebar.button("πŸ”„ Clear Chat"):
48
- st.session_state.messages = []
49
- st.session_state.thread_id = None
50
- st.session_state.image_url = None
51
- st.session_state.image_updated = False
52
- st.rerun()
53
-
54
- show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
55
-
56
- # ------------------ Layout ------------------
57
- left, center, right = st.columns([1, 2, 1])
58
-
59
- # ------------------ Left Column: Document Image ------------------
60
- with left:
61
- st.subheader("πŸ“„ Document Image")
62
- if show_image and st.session_state.image_url:
63
- try:
64
- image = Image.open(requests.get(st.session_state.image_url, stream=True).raw)
65
- st.image(image, caption="πŸ“‘ Extracted Page", use_container_width=True)
66
- st.session_state.image_updated = False
67
- except Exception as e:
68
- st.warning("⚠️ Could not load image.")
69
-
70
- # ------------------ Center Column: Chat UI ------------------
71
- with center:
72
- st.subheader("πŸ’¬ Document AI Assistant")
73
- for message in st.session_state.messages:
74
- role, content = message["role"], message["content"]
75
- st.chat_message(role).write(content)
76
-
77
- if prompt := st.chat_input("Type your question about the document..."):
78
- st.session_state.messages.append({"role": "user", "content": prompt})
79
- st.chat_message("user").write(prompt)
80
-
81
- try:
82
- if st.session_state.thread_id is None:
83
- thread = client.beta.threads.create()
84
- st.session_state.thread_id = thread.id
85
-
86
- thread_id = st.session_state.thread_id
87
-
88
- client.beta.threads.messages.create(
89
- thread_id=thread_id,
90
- role="user",
91
- content=prompt
92
- )
93
-
94
- run = client.beta.threads.runs.create(
95
- thread_id=thread_id,
96
- assistant_id=ASSISTANT_ID
97
- )
98
-
99
- with st.spinner("Assistant is thinking..."):
100
- while True:
101
- run_status = client.beta.threads.runs.retrieve(
102
- thread_id=thread_id,
103
- run_id=run.id
104
- )
105
- if run_status.status == "completed":
106
- break
107
- time.sleep(1)
108
-
109
- messages = client.beta.threads.messages.list(thread_id=thread_id)
110
- assistant_message = None
111
- for message in reversed(messages.data):
112
- if message.role == "assistant":
113
- assistant_message = message.content[0].text.value
114
- break
115
-
116
- st.chat_message("assistant").write(assistant_message)
117
- st.session_state.messages.append({"role": "assistant", "content": assistant_message})
118
-
119
- # Extract GitHub image URL
120
- image_match = re.search(
121
- r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
122
- assistant_message
123
- )
124
- if image_match:
125
- st.session_state.image_url = image_match.group(0)
126
- st.session_state.image_updated = True
127
- st.rerun()
128
-
129
- except Exception as e:
130
- st.error(f"❌ Error: {str(e)}")
131
-
132
- # ------------------ Right Column: Structured Summary + FAQ ------------------
133
  with right:
134
  st.subheader("πŸ“Œ Summary & FAQ (from Structured Data)")
135
 
136
- summary_text = "No image selected or page not found."
 
 
 
 
 
137
  faq_list = []
138
 
139
  if st.session_state.image_url:
140
  match = re.search(r'/(\d{3})\.png', st.session_state.image_url)
141
  if match:
142
- page_number = int(match.group(1)) # βœ… Must be int to match JSON
143
  page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None)
144
- if page_entry:
145
- summary_text = page_entry.get("summary", "No summary available.")
146
- faq_list = page_entry.get("faqs", []) or page_entry.get("questions", [])
147
-
148
- # Summary Output
149
- st.subheader("πŸ“ Summary")
150
- st.markdown(summary_text)
151
 
152
- # FAQs Output
153
- st.subheader("❓ Auto-Generated FAQ")
154
- if faq_list:
155
- for faq in faq_list:
156
- if isinstance(faq, dict): # for {"question": "...", "answer": "..."}
157
- st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
158
- else: # fallback if it's just a list of questions
159
- st.markdown(f"**Q:** {faq}")
160
- else:
161
- st.info("No FAQs available for this page.")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------------ Right Column: Structured Summary + FAQ (with Buttons) ------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  with right:
3
  st.subheader("πŸ“Œ Summary & FAQ (from Structured Data)")
4
 
5
+ # Controls
6
+ col1, col2 = st.columns(2)
7
+ show_summary = col1.button("πŸ“ Load Summary")
8
+ show_faq = col2.button("❓ Load FAQ")
9
+
10
+ summary_text = "Click the button to load summary."
11
  faq_list = []
12
 
13
  if st.session_state.image_url:
14
  match = re.search(r'/(\d{3})\.png', st.session_state.image_url)
15
  if match:
16
+ page_number = int(match.group(1))
17
  page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None)
 
 
 
 
 
 
 
18
 
19
+ if page_entry:
20
+ if show_summary:
21
+ summary_text = page_entry.get("summary", "No summary available.")
22
+ if show_faq:
23
+ faq_list = page_entry.get("faqs", []) or page_entry.get("questions", [])
24
+
25
+ # Display Summary
26
+ if show_summary:
27
+ st.subheader("πŸ“ Summary")
28
+ st.markdown(summary_text)
29
+
30
+ # Display FAQs
31
+ if show_faq:
32
+ st.subheader("❓ Auto-Generated FAQ")
33
+ if faq_list:
34
+ for faq in faq_list:
35
+ if isinstance(faq, dict):
36
+ st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
37
+ else:
38
+ st.markdown(f"**Q:** {faq}")
39
+ else:
40
+ st.info("No FAQs available for this page.")