Spaces:

IAMTFRMZA
/

documentaitest

Running

App Files Files Community

IAMTFRMZA commited on 16 days ago

Commit

8c4492e

verified ·

1 Parent(s): e1d9b68

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -34

app.py CHANGED Viewed

@@ -1,44 +1,170 @@
 import json
-# Load the structured pathology JSON file
-@st.cache_data
-def load_data():
-    with open("51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output (1).json", "r") as f:
-        return json.load(f)
-data = load_data()
-st.title("📘 Surgical Pathology Manual - Page Summary & FAQ")
-# Get available pages
-page_numbers = sorted({int(entry["page"]) for entry in data if "page" in entry})
-selected_page = st.selectbox("Select Page Number", page_numbers)
-# Filter content for the selected page
-page_content = [entry for entry in data if int(entry.get("page", -1)) == selected_page]
-if page_content:
-    for section in page_content:
-        section_title = section.get("section_heading", "Untitled Section")
-        summary = section.get("summary")
-        faq = section.get("faq")
-        st.markdown(f"### 🧠 Section: {section_title}")
-        if summary:
-            st.markdown("#### 🔍 Summary")
-            st.write(summary)
-        else:
-            st.info("No summary available for this section.")
-        if faq:
-            st.markdown("#### ❓ FAQ")
-            for qna in faq:
-                question = qna.get("question", "")
-                answer = qna.get("answer", "")
-                st.markdown(f"**Q:** {question}")
-                st.markdown(f"**A:** {answer}")
         else:
-            st.info("No FAQs available for this section.")
-else:
-    st.warning("No content found for the selected page.")

+import streamlit as st
+import os
+import time
+import re
 import json
+import requests
+from PIL import Image
+from openai import OpenAI
+from io import BytesIO
+# ------------------ App Configuration ------------------
+st.set_page_config(page_title="Document AI Assistant", layout="wide")
+st.title("📄 Document AI Assistant")
+st.caption("Chat with an AI Assistant on your medical/pathology documents")
+# ------------------ Load API Key and Assistant ID ------------------
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
+if not OPENAI_API_KEY or not ASSISTANT_ID:
+    st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
+    st.stop()
+client = OpenAI(api_key=OPENAI_API_KEY)
+# ------------------ Load Structured JSON ------------------
+STRUCTURED_JSON_PATH = "51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json"
+try:
+    with open(STRUCTURED_JSON_PATH, "r") as f:
+        structured_data = json.load(f)
+except Exception as e:
+    st.error(f"❌ Failed to load structured summary file: {e}")
+    st.stop()
+# ------------------ Session State Initialization ------------------
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "thread_id" not in st.session_state:
+    st.session_state.thread_id = None
+if "image_url" not in st.session_state:
+    st.session_state.image_url = None
+if "image_updated" not in st.session_state:
+    st.session_state.image_updated = False
+# ------------------ Sidebar Controls ------------------
+st.sidebar.header("🔧 Settings")
+if st.sidebar.button("🔄 Clear Chat"):
+    st.session_state.messages = []
+    st.session_state.thread_id = None
+    st.session_state.image_url = None
+    st.session_state.image_updated = False
+    st.rerun()
+show_image = st.sidebar.checkbox("📖 Show Document Image", value=True)
+# ------------------ Layout ------------------
+left, center, right = st.columns([1, 2, 1])
+# ------------------ Left Column: Document Image ------------------
+with left:
+    st.subheader("📄 Document Image")
+    if show_image and st.session_state.image_url:
+        try:
+            image = Image.open(requests.get(st.session_state.image_url, stream=True).raw)
+            st.image(image, caption="📑 Extracted Page", use_container_width=True)
+            st.session_state.image_updated = False
+        except Exception as e:
+            st.warning("⚠️ Could not load image.")
+# ------------------ Center Column: Chat UI ------------------
+with center:
+    st.subheader("💬 Document AI Assistant")
+    for message in st.session_state.messages:
+        role, content = message["role"], message["content"]
+        st.chat_message(role).write(content)
+    if prompt := st.chat_input("Type your question about the document..."):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        st.chat_message("user").write(prompt)
+        try:
+            if st.session_state.thread_id is None:
+                thread = client.beta.threads.create()
+                st.session_state.thread_id = thread.id
+            thread_id = st.session_state.thread_id
+            client.beta.threads.messages.create(
+                thread_id=thread_id,
+                role="user",
+                content=prompt
+            )
+            run = client.beta.threads.runs.create(
+                thread_id=thread_id,
+                assistant_id=ASSISTANT_ID
+            )
+            with st.spinner("Assistant is thinking..."):
+                while True:
+                    run_status = client.beta.threads.runs.retrieve(
+                        thread_id=thread_id,
+                        run_id=run.id
+                    )
+                    if run_status.status == "completed":
+                        break
+                    time.sleep(1)
+            messages = client.beta.threads.messages.list(thread_id=thread_id)
+            assistant_message = None
+            for message in reversed(messages.data):
+                if message.role == "assistant":
+                    assistant_message = message.content[0].text.value
+                    break
+            st.chat_message("assistant").write(assistant_message)
+            st.session_state.messages.append({"role": "assistant", "content": assistant_message})
+            # Extract GitHub image URL
+            image_match = re.search(
+                r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
+                assistant_message
+            )
+            if image_match:
+                st.session_state.image_url = image_match.group(0)
+                st.session_state.image_updated = True
+                st.rerun()
+        except Exception as e:
+            st.error(f"❌ Error: {str(e)}")
+# ------------------ Right Column: Structured Summary + FAQ (Button-based) ------------------
+with right:
+    st.subheader("📌 Summary & FAQ (from Structured Data)")
+    col1, col2 = st.columns(2)
+    show_summary = col1.button("📝 Load Summary")
+    show_faq = col2.button("❓ Load FAQ")
+    summary_text = "Click the button to load summary."
+    faq_list = []
+    if st.session_state.image_url:
+        match = re.search(r'/(\d{3})\.png', st.session_state.image_url)
+        if match:
+            page_number = int(match.group(1))
+            page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None)
+            if page_entry:
+                if show_summary:
+                    summary_text = page_entry.get("summary", "No summary available.")
+                if show_faq:
+                    faq_list = page_entry.get("faqs", []) or page_entry.get("questions", [])
+    # Display Summary
+    if show_summary:
+        st.subheader("📝 Summary")
+        st.markdown(summary_text)
+    # Display FAQs
+    if show_faq:
+        st.subheader("❓ Auto-Generated FAQ")
+        if faq_list:
+            for faq in faq_list:
+                if isinstance(faq, dict):
+                    st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
+                else:
+                    st.markdown(f"**Q:** {faq}")
         else:
+            st.info("No FAQs available for this page.")