Spaces:

ProfessorLeVesseur
/

PDF_Topic_Extraction_Analysis_App

Running

App Files Files Community

ProfessorLeVesseur commited on 21 days ago

Commit

5fed4e8

verified ·

1 Parent(s): 39b1f14

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -22

app.py CHANGED Viewed

@@ -38,11 +38,62 @@ for key in ['pdf_processed', 'markdown_texts', 'df']:
 # ---------------------------------------------------------------------------------------
 # API Configuration
 # ---------------------------------------------------------------------------------------
-API_URL = "https://api.stack-ai.com/inference/v0/run/2df89a6c-a4af-4576-880e-27058e498f02/67acad8b0603ba4631db38e7"
-headers = {
-    'Authorization': 'Bearer a9e4979e-cdbe-49ea-a193-53562a784805',
-    'Content-Type': 'application/json'
-}
 # ---------------------------------------------------------------------------------------
 # Survey Analysis Class
@@ -65,24 +116,43 @@ Meeting Notes:
 {survey_response}
 """
-    def query_api(self, payload):
-        try:
-            res = requests.post(API_URL, headers=headers, json=payload, timeout=60)
-            res.raise_for_status()
-            return res.json()
-        except requests.exceptions.RequestException as e:
-            st.error(f"API request failed: {e}")
-            return {'outputs': {'out-0': ''}}
-    def extract_meeting_notes(self, response):
-        return response.get('outputs', {}).get('out-0', '')
     def process_dataframe(self, df, topics):
         results = []
         for _, row in df.iterrows():
             llm_input = self.prepare_llm_input(row['Document_Text'], topics)
-            payload = {"user_id": "user", "in-0": llm_input}
-            response = self.query_api(payload)
             notes = self.extract_meeting_notes(response)
             results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
         return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
@@ -183,10 +253,5 @@ if st.session_state['pdf_processed']:
         topic_counts.plot.bar(ax=ax, color='#3d9aa1')
         st.pyplot(fig)
-if st.button("Reset / Upload New PDF"):
-    for key in ['pdf_processed', 'markdown_texts', 'df']:
-        st.session_state[key] = False if key == 'pdf_processed' else []
-    st.experimental_rerun()
 if not uploaded_file:
     st.info("Please upload a PDF file to begin.")

 # ---------------------------------------------------------------------------------------
 # API Configuration
 # ---------------------------------------------------------------------------------------
+# API_URL = "https://api.stack-ai.com/inference/v0/run/2df89a6c-a4af-4576-880e-27058e498f02/67acad8b0603ba4631db38e7"
+# headers = {
+#     'Authorization': 'Bearer a9e4979e-cdbe-49ea-a193-53562a784805',
+#     'Content-Type': 'application/json'
+# }
+# Retrieve Hugging Face API key from environment variables
+hf_api_key = os.getenv('HF_API_KEY')
+if not hf_api_key:
+    raise ValueError("HF_API_KEY not set in environment variables")
+# Create the Hugging Face inference client
+client = InferenceClient(api_key=hf_api_key)
+# # ---------------------------------------------------------------------------------------
+# # Survey Analysis Class
+# # ---------------------------------------------------------------------------------------
+# class SurveyAnalysis:
+#     def prepare_llm_input(self, survey_response, topics):
+#         topic_descriptions = "\n".join([f"- **{t}**: {d}" for t, d in topics.items()])
+#         return f"""Extract and summarize PDF notes based on topics:
+# {topic_descriptions}
+# Instructions:
+# - Extract exact quotes per topic.
+# - Ignore irrelevant topics.
+# Format:
+# [Topic]
+# - "Exact quote"
+# Meeting Notes:
+# {survey_response}
+# """
+#     def query_api(self, payload):
+#         try:
+#             res = requests.post(API_URL, headers=headers, json=payload, timeout=60)
+#             res.raise_for_status()
+#             return res.json()
+#         except requests.exceptions.RequestException as e:
+#             st.error(f"API request failed: {e}")
+#             return {'outputs': {'out-0': ''}}
+#     def extract_meeting_notes(self, response):
+#         return response.get('outputs', {}).get('out-0', '')
+#     def process_dataframe(self, df, topics):
+#         results = []
+#         for _, row in df.iterrows():
+#             llm_input = self.prepare_llm_input(row['Document_Text'], topics)
+#             payload = {"user_id": "user", "in-0": llm_input}
+#             response = self.query_api(payload)
+#             notes = self.extract_meeting_notes(response)
+#             results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
+#         return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
 # ---------------------------------------------------------------------------------------
 # Survey Analysis Class
 {survey_response}
 """
+    def prompt_response_from_hf_llm(self, llm_input):
+        # Define a system prompt to guide the model's responses
+        system_prompt = """
+        <Persona> An expert Implementation Specialist at Michigan's Multi-Tiered System of Support Technical Assistance Center (MiMTSS TA Center) with deep expertise in SWPBIS, SEL, Structured Literacy, Science of Reading, and family engagement practices.</Persona>
+        <Task> Analyze educational data and provide evidence-based recommendations for improving student outcomes across multiple tiers of support, drawing from established frameworks in behavioral interventions, literacy instruction, and family engagement.</Task>
+        <Context> Operating within Michigan's educational system to support schools in implementing multi-tiered support systems, with access to student metrics data and knowledge of state-specific educational requirements and MTSS frameworks. </Context>
+        <Format> Deliver insights through clear, actionable recommendations supported by data analysis, incorporating technical expertise while maintaining accessibility for educators and administrators at various levels of MTSS implementation.</Format>
+        """
+        # Generate the refined prompt using Hugging Face API
+        response = client.chat.completions.create(
+            model="meta-llama/Llama-3.1-70B-Instruct",
+            messages=[
+                {"role": "system", "content": system_prompt},  # Add system prompt here
+                {"role": "user", "content": llm_input}
+            ],
+            stream=True,
+            temperature=0.5,
+            max_tokens=1024,
+            top_p=0.7
+        )
+        # Combine messages if response is streamed
+        response_content = ""
+        for message in response:
+            response_content += message.choices[0].delta.content
+        return response_content.strip()
+    def extract_text(self, response):
+        return response
     def process_dataframe(self, df, topics):
         results = []
         for _, row in df.iterrows():
             llm_input = self.prepare_llm_input(row['Document_Text'], topics)
+            response = self.prompt_response_from_hf_llm(llm_input)
             notes = self.extract_meeting_notes(response)
             results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
         return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
         topic_counts.plot.bar(ax=ax, color='#3d9aa1')
         st.pyplot(fig)
 if not uploaded_file:
     st.info("Please upload a PDF file to begin.")