IAMTFRMZA commited on
Commit
8c4492e
Β·
verified Β·
1 Parent(s): e1d9b68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -34
app.py CHANGED
@@ -1,44 +1,170 @@
 
 
 
 
1
  import json
 
 
 
 
2
 
3
- # Load the structured pathology JSON file
4
- @st.cache_data
5
- def load_data():
6
- with open("51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output (1).json", "r") as f:
7
- return json.load(f)
8
 
9
- data = load_data()
 
 
10
 
11
- st.title("πŸ“˜ Surgical Pathology Manual - Page Summary & FAQ")
 
 
12
 
13
- # Get available pages
14
- page_numbers = sorted({int(entry["page"]) for entry in data if "page" in entry})
15
- selected_page = st.selectbox("Select Page Number", page_numbers)
16
 
17
- # Filter content for the selected page
18
- page_content = [entry for entry in data if int(entry.get("page", -1)) == selected_page]
 
 
 
 
 
 
19
 
20
- if page_content:
21
- for section in page_content:
22
- section_title = section.get("section_heading", "Untitled Section")
23
- summary = section.get("summary")
24
- faq = section.get("faq")
 
 
 
 
25
 
26
- st.markdown(f"### 🧠 Section: {section_title}")
 
 
 
 
 
 
 
27
 
28
- if summary:
29
- st.markdown("#### πŸ” Summary")
30
- st.write(summary)
31
- else:
32
- st.info("No summary available for this section.")
33
-
34
- if faq:
35
- st.markdown("#### ❓ FAQ")
36
- for qna in faq:
37
- question = qna.get("question", "")
38
- answer = qna.get("answer", "")
39
- st.markdown(f"**Q:** {question}")
40
- st.markdown(f"**A:** {answer}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  else:
42
- st.info("No FAQs available for this section.")
43
- else:
44
- st.warning("No content found for the selected page.")
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ import re
5
  import json
6
+ import requests
7
+ from PIL import Image
8
+ from openai import OpenAI
9
+ from io import BytesIO
10
 
11
+ # ------------------ App Configuration ------------------
12
+ st.set_page_config(page_title="Document AI Assistant", layout="wide")
13
+ st.title("πŸ“„ Document AI Assistant")
14
+ st.caption("Chat with an AI Assistant on your medical/pathology documents")
 
15
 
16
+ # ------------------ Load API Key and Assistant ID ------------------
17
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
18
+ ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
19
 
20
+ if not OPENAI_API_KEY or not ASSISTANT_ID:
21
+ st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
22
+ st.stop()
23
 
24
+ client = OpenAI(api_key=OPENAI_API_KEY)
 
 
25
 
26
+ # ------------------ Load Structured JSON ------------------
27
+ STRUCTURED_JSON_PATH = "51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json"
28
+ try:
29
+ with open(STRUCTURED_JSON_PATH, "r") as f:
30
+ structured_data = json.load(f)
31
+ except Exception as e:
32
+ st.error(f"❌ Failed to load structured summary file: {e}")
33
+ st.stop()
34
 
35
+ # ------------------ Session State Initialization ------------------
36
+ if "messages" not in st.session_state:
37
+ st.session_state.messages = []
38
+ if "thread_id" not in st.session_state:
39
+ st.session_state.thread_id = None
40
+ if "image_url" not in st.session_state:
41
+ st.session_state.image_url = None
42
+ if "image_updated" not in st.session_state:
43
+ st.session_state.image_updated = False
44
 
45
+ # ------------------ Sidebar Controls ------------------
46
+ st.sidebar.header("πŸ”§ Settings")
47
+ if st.sidebar.button("πŸ”„ Clear Chat"):
48
+ st.session_state.messages = []
49
+ st.session_state.thread_id = None
50
+ st.session_state.image_url = None
51
+ st.session_state.image_updated = False
52
+ st.rerun()
53
 
54
+ show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
55
+
56
+ # ------------------ Layout ------------------
57
+ left, center, right = st.columns([1, 2, 1])
58
+
59
+ # ------------------ Left Column: Document Image ------------------
60
+ with left:
61
+ st.subheader("πŸ“„ Document Image")
62
+ if show_image and st.session_state.image_url:
63
+ try:
64
+ image = Image.open(requests.get(st.session_state.image_url, stream=True).raw)
65
+ st.image(image, caption="πŸ“‘ Extracted Page", use_container_width=True)
66
+ st.session_state.image_updated = False
67
+ except Exception as e:
68
+ st.warning("⚠️ Could not load image.")
69
+
70
+ # ------------------ Center Column: Chat UI ------------------
71
+ with center:
72
+ st.subheader("πŸ’¬ Document AI Assistant")
73
+ for message in st.session_state.messages:
74
+ role, content = message["role"], message["content"]
75
+ st.chat_message(role).write(content)
76
+
77
+ if prompt := st.chat_input("Type your question about the document..."):
78
+ st.session_state.messages.append({"role": "user", "content": prompt})
79
+ st.chat_message("user").write(prompt)
80
+
81
+ try:
82
+ if st.session_state.thread_id is None:
83
+ thread = client.beta.threads.create()
84
+ st.session_state.thread_id = thread.id
85
+
86
+ thread_id = st.session_state.thread_id
87
+
88
+ client.beta.threads.messages.create(
89
+ thread_id=thread_id,
90
+ role="user",
91
+ content=prompt
92
+ )
93
+
94
+ run = client.beta.threads.runs.create(
95
+ thread_id=thread_id,
96
+ assistant_id=ASSISTANT_ID
97
+ )
98
+
99
+ with st.spinner("Assistant is thinking..."):
100
+ while True:
101
+ run_status = client.beta.threads.runs.retrieve(
102
+ thread_id=thread_id,
103
+ run_id=run.id
104
+ )
105
+ if run_status.status == "completed":
106
+ break
107
+ time.sleep(1)
108
+
109
+ messages = client.beta.threads.messages.list(thread_id=thread_id)
110
+ assistant_message = None
111
+ for message in reversed(messages.data):
112
+ if message.role == "assistant":
113
+ assistant_message = message.content[0].text.value
114
+ break
115
+
116
+ st.chat_message("assistant").write(assistant_message)
117
+ st.session_state.messages.append({"role": "assistant", "content": assistant_message})
118
+
119
+ # Extract GitHub image URL
120
+ image_match = re.search(
121
+ r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
122
+ assistant_message
123
+ )
124
+ if image_match:
125
+ st.session_state.image_url = image_match.group(0)
126
+ st.session_state.image_updated = True
127
+ st.rerun()
128
+
129
+ except Exception as e:
130
+ st.error(f"❌ Error: {str(e)}")
131
+
132
+ # ------------------ Right Column: Structured Summary + FAQ (Button-based) ------------------
133
+ with right:
134
+ st.subheader("πŸ“Œ Summary & FAQ (from Structured Data)")
135
+
136
+ col1, col2 = st.columns(2)
137
+ show_summary = col1.button("πŸ“ Load Summary")
138
+ show_faq = col2.button("❓ Load FAQ")
139
+
140
+ summary_text = "Click the button to load summary."
141
+ faq_list = []
142
+
143
+ if st.session_state.image_url:
144
+ match = re.search(r'/(\d{3})\.png', st.session_state.image_url)
145
+ if match:
146
+ page_number = int(match.group(1))
147
+ page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None)
148
+
149
+ if page_entry:
150
+ if show_summary:
151
+ summary_text = page_entry.get("summary", "No summary available.")
152
+ if show_faq:
153
+ faq_list = page_entry.get("faqs", []) or page_entry.get("questions", [])
154
+
155
+ # Display Summary
156
+ if show_summary:
157
+ st.subheader("πŸ“ Summary")
158
+ st.markdown(summary_text)
159
+
160
+ # Display FAQs
161
+ if show_faq:
162
+ st.subheader("❓ Auto-Generated FAQ")
163
+ if faq_list:
164
+ for faq in faq_list:
165
+ if isinstance(faq, dict):
166
+ st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
167
+ else:
168
+ st.markdown(f"**Q:** {faq}")
169
  else:
170
+ st.info("No FAQs available for this page.")