Update app.py
Browse files
app.py
CHANGED
@@ -38,11 +38,62 @@ for key in ['pdf_processed', 'markdown_texts', 'df']:
|
|
38 |
# ---------------------------------------------------------------------------------------
|
39 |
# API Configuration
|
40 |
# ---------------------------------------------------------------------------------------
|
41 |
-
API_URL = "https://api.stack-ai.com/inference/v0/run/2df89a6c-a4af-4576-880e-27058e498f02/67acad8b0603ba4631db38e7"
|
42 |
-
headers = {
|
43 |
-
|
44 |
-
|
45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# ---------------------------------------------------------------------------------------
|
48 |
# Survey Analysis Class
|
@@ -65,24 +116,43 @@ Meeting Notes:
|
|
65 |
{survey_response}
|
66 |
"""
|
67 |
|
68 |
-
def
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
|
|
|
|
79 |
|
80 |
def process_dataframe(self, df, topics):
|
81 |
results = []
|
82 |
for _, row in df.iterrows():
|
83 |
llm_input = self.prepare_llm_input(row['Document_Text'], topics)
|
84 |
-
|
85 |
-
response = self.query_api(payload)
|
86 |
notes = self.extract_meeting_notes(response)
|
87 |
results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
|
88 |
return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
|
@@ -183,10 +253,5 @@ if st.session_state['pdf_processed']:
|
|
183 |
topic_counts.plot.bar(ax=ax, color='#3d9aa1')
|
184 |
st.pyplot(fig)
|
185 |
|
186 |
-
if st.button("Reset / Upload New PDF"):
|
187 |
-
for key in ['pdf_processed', 'markdown_texts', 'df']:
|
188 |
-
st.session_state[key] = False if key == 'pdf_processed' else []
|
189 |
-
st.experimental_rerun()
|
190 |
-
|
191 |
if not uploaded_file:
|
192 |
st.info("Please upload a PDF file to begin.")
|
|
|
38 |
# ---------------------------------------------------------------------------------------
|
39 |
# API Configuration
|
40 |
# ---------------------------------------------------------------------------------------
|
41 |
+
# API_URL = "https://api.stack-ai.com/inference/v0/run/2df89a6c-a4af-4576-880e-27058e498f02/67acad8b0603ba4631db38e7"
|
42 |
+
# headers = {
|
43 |
+
# 'Authorization': 'Bearer a9e4979e-cdbe-49ea-a193-53562a784805',
|
44 |
+
# 'Content-Type': 'application/json'
|
45 |
+
# }
|
46 |
+
|
47 |
+
# Retrieve Hugging Face API key from environment variables
|
48 |
+
hf_api_key = os.getenv('HF_API_KEY')
|
49 |
+
if not hf_api_key:
|
50 |
+
raise ValueError("HF_API_KEY not set in environment variables")
|
51 |
+
|
52 |
+
# Create the Hugging Face inference client
|
53 |
+
client = InferenceClient(api_key=hf_api_key)
|
54 |
+
|
55 |
+
# # ---------------------------------------------------------------------------------------
|
56 |
+
# # Survey Analysis Class
|
57 |
+
# # ---------------------------------------------------------------------------------------
|
58 |
+
# class SurveyAnalysis:
|
59 |
+
# def prepare_llm_input(self, survey_response, topics):
|
60 |
+
# topic_descriptions = "\n".join([f"- **{t}**: {d}" for t, d in topics.items()])
|
61 |
+
# return f"""Extract and summarize PDF notes based on topics:
|
62 |
+
# {topic_descriptions}
|
63 |
+
|
64 |
+
# Instructions:
|
65 |
+
# - Extract exact quotes per topic.
|
66 |
+
# - Ignore irrelevant topics.
|
67 |
+
|
68 |
+
# Format:
|
69 |
+
# [Topic]
|
70 |
+
# - "Exact quote"
|
71 |
+
|
72 |
+
# Meeting Notes:
|
73 |
+
# {survey_response}
|
74 |
+
# """
|
75 |
+
|
76 |
+
# def query_api(self, payload):
|
77 |
+
# try:
|
78 |
+
# res = requests.post(API_URL, headers=headers, json=payload, timeout=60)
|
79 |
+
# res.raise_for_status()
|
80 |
+
# return res.json()
|
81 |
+
# except requests.exceptions.RequestException as e:
|
82 |
+
# st.error(f"API request failed: {e}")
|
83 |
+
# return {'outputs': {'out-0': ''}}
|
84 |
+
|
85 |
+
# def extract_meeting_notes(self, response):
|
86 |
+
# return response.get('outputs', {}).get('out-0', '')
|
87 |
+
|
88 |
+
# def process_dataframe(self, df, topics):
|
89 |
+
# results = []
|
90 |
+
# for _, row in df.iterrows():
|
91 |
+
# llm_input = self.prepare_llm_input(row['Document_Text'], topics)
|
92 |
+
# payload = {"user_id": "user", "in-0": llm_input}
|
93 |
+
# response = self.query_api(payload)
|
94 |
+
# notes = self.extract_meeting_notes(response)
|
95 |
+
# results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
|
96 |
+
# return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
|
97 |
|
98 |
# ---------------------------------------------------------------------------------------
|
99 |
# Survey Analysis Class
|
|
|
116 |
{survey_response}
|
117 |
"""
|
118 |
|
119 |
+
def prompt_response_from_hf_llm(self, llm_input):
|
120 |
+
# Define a system prompt to guide the model's responses
|
121 |
+
system_prompt = """
|
122 |
+
<Persona> An expert Implementation Specialist at Michigan's Multi-Tiered System of Support Technical Assistance Center (MiMTSS TA Center) with deep expertise in SWPBIS, SEL, Structured Literacy, Science of Reading, and family engagement practices.</Persona>
|
123 |
+
<Task> Analyze educational data and provide evidence-based recommendations for improving student outcomes across multiple tiers of support, drawing from established frameworks in behavioral interventions, literacy instruction, and family engagement.</Task>
|
124 |
+
<Context> Operating within Michigan's educational system to support schools in implementing multi-tiered support systems, with access to student metrics data and knowledge of state-specific educational requirements and MTSS frameworks. </Context>
|
125 |
+
<Format> Deliver insights through clear, actionable recommendations supported by data analysis, incorporating technical expertise while maintaining accessibility for educators and administrators at various levels of MTSS implementation.</Format>
|
126 |
+
"""
|
127 |
+
|
128 |
+
# Generate the refined prompt using Hugging Face API
|
129 |
+
response = client.chat.completions.create(
|
130 |
+
model="meta-llama/Llama-3.1-70B-Instruct",
|
131 |
+
messages=[
|
132 |
+
{"role": "system", "content": system_prompt}, # Add system prompt here
|
133 |
+
{"role": "user", "content": llm_input}
|
134 |
+
],
|
135 |
+
stream=True,
|
136 |
+
temperature=0.5,
|
137 |
+
max_tokens=1024,
|
138 |
+
top_p=0.7
|
139 |
+
)
|
140 |
+
|
141 |
+
# Combine messages if response is streamed
|
142 |
+
response_content = ""
|
143 |
+
for message in response:
|
144 |
+
response_content += message.choices[0].delta.content
|
145 |
|
146 |
+
return response_content.strip()
|
147 |
+
|
148 |
+
def extract_text(self, response):
|
149 |
+
return response
|
150 |
|
151 |
def process_dataframe(self, df, topics):
|
152 |
results = []
|
153 |
for _, row in df.iterrows():
|
154 |
llm_input = self.prepare_llm_input(row['Document_Text'], topics)
|
155 |
+
response = self.prompt_response_from_hf_llm(llm_input)
|
|
|
156 |
notes = self.extract_meeting_notes(response)
|
157 |
results.append({'Document_Text': row['Document_Text'], 'Topic_Summary': notes})
|
158 |
return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)['Topic_Summary']], axis=1)
|
|
|
253 |
topic_counts.plot.bar(ax=ax, color='#3d9aa1')
|
254 |
st.pyplot(fig)
|
255 |
|
|
|
|
|
|
|
|
|
|
|
256 |
if not uploaded_file:
|
257 |
st.info("Please upload a PDF file to begin.")
|