Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -95,23 +95,23 @@ else:
|
|
95 |
with col2:
|
96 |
country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names) # Display filtered country names
|
97 |
|
98 |
-
#
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
#
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
|
111 |
# Checkbox to control whether to show only exact matches
|
112 |
show_exact_matches = st.checkbox("Show only exact matches", value=False)
|
113 |
|
114 |
-
def filter_results(results, country_filter, region_filter): ##
|
115 |
filtered = []
|
116 |
for r in results:
|
117 |
metadata = r.payload.get('metadata', {})
|
@@ -146,7 +146,7 @@ def filter_results(results, country_filter, region_filter): ## , end_year_range
|
|
146 |
if (
|
147 |
(country_filter == "All/Not allocated" or selected_iso_code in c_list)
|
148 |
and (region_filter == "All/Not allocated" or countries_in_region)
|
149 |
-
|
150 |
):
|
151 |
filtered.append(r)
|
152 |
return filtered
|
@@ -172,8 +172,8 @@ lexical_all = [
|
|
172 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
173 |
|
174 |
# 2) Filter the entire sets
|
175 |
-
filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter
|
176 |
-
filtered_lexical = filter_results(lexical_all, country_filter, region_filter
|
177 |
|
178 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
|
179 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
@@ -205,8 +205,8 @@ if show_exact_matches:
|
|
205 |
|
206 |
# 3) Now apply your region/country/year filter on that new list
|
207 |
filtered_lexical = filter_results(
|
208 |
-
lexical_substring_filtered, country_filter, region_filter
|
209 |
-
) ##
|
210 |
|
211 |
# 4) Remove duplicates
|
212 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
@@ -277,8 +277,10 @@ if show_exact_matches:
|
|
277 |
|
278 |
additional_text = (
|
279 |
f"Commissioned by **{client_name}**\n"
|
|
|
280 |
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
281 |
-
|
|
|
282 |
)
|
283 |
st.markdown(additional_text)
|
284 |
st.divider()
|
@@ -291,32 +293,37 @@ else:
|
|
291 |
else:
|
292 |
# Show the top 15 from filtered_semantic
|
293 |
for res in filtered_semantic_no_dupe[:15]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
project_name = res.payload['metadata'].get('project_name', 'Project Link')
|
295 |
-
|
296 |
-
st.markdown(f"####
|
297 |
|
298 |
-
# Snippet logic
|
299 |
-
|
300 |
-
|
301 |
-
|
|
|
|
|
|
|
|
|
|
|
302 |
preview_text = " ".join(words[:preview_word_count])
|
303 |
remainder_text = " ".join(words[preview_word_count:])
|
304 |
st.write(preview_text + ("..." if remainder_text else ""))
|
305 |
|
306 |
# Keywords
|
|
|
307 |
top_keywords = extract_top_keywords(full_text, top_n=5)
|
308 |
if top_keywords:
|
309 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
310 |
-
|
311 |
-
# Metadata
|
312 |
-
metadata = res.payload.get('metadata', {})
|
313 |
-
countries = metadata.get('countries', "[]")
|
314 |
-
client_name = metadata.get('client', 'Unknown Client')
|
315 |
-
start_year = metadata.get('start_year', None)
|
316 |
-
end_year = metadata.get('end_year', None)
|
317 |
-
total_volume = metadata.get('total_volume', "Unknown")
|
318 |
-
total_project = metadata.get('total_project', "Unknown")
|
319 |
-
id = metadata.get('id', "Unknown")
|
320 |
|
321 |
try:
|
322 |
c_list = json.loads(countries.replace("'", '"'))
|
@@ -339,16 +346,27 @@ else:
|
|
339 |
# Format the year range
|
340 |
start_year_str = extract_year(start_year) if start_year else "Unknown"
|
341 |
end_year_str = extract_year(end_year) if end_year else "Unknown"
|
|
|
|
|
|
|
|
|
342 |
|
343 |
# Build the final string
|
344 |
if matched_countries:
|
345 |
additional_text = (
|
346 |
-
f"**{', '.join(matched_countries)}
|
347 |
-
|
|
|
|
|
|
|
348 |
)
|
349 |
else:
|
350 |
additional_text = (
|
351 |
-
f"Commissioned by **{client_name}
|
|
|
|
|
|
|
|
|
352 |
)
|
353 |
|
354 |
|
|
|
95 |
with col2:
|
96 |
country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names) # Display filtered country names
|
97 |
|
98 |
+
# Year range slider # ToDo add end_year filter again
|
99 |
+
with col3:
|
100 |
+
current_year = datetime.now().year
|
101 |
+
default_start_year = current_year - 5
|
102 |
+
|
103 |
+
# 3) The max_value is now the actual max end_year from collection
|
104 |
+
end_year_range = st.slider(
|
105 |
+
"Project End Year",
|
106 |
+
min_value=2010,
|
107 |
+
max_value=max_end_year,
|
108 |
+
value=(default_start_year, max_end_year),
|
109 |
+
)
|
110 |
|
111 |
# Checkbox to control whether to show only exact matches
|
112 |
show_exact_matches = st.checkbox("Show only exact matches", value=False)
|
113 |
|
114 |
+
def filter_results(results, country_filter, region_filter, end_year_range): ## ToDo add end_year filter again
|
115 |
filtered = []
|
116 |
for r in results:
|
117 |
metadata = r.payload.get('metadata', {})
|
|
|
146 |
if (
|
147 |
(country_filter == "All/Not allocated" or selected_iso_code in c_list)
|
148 |
and (region_filter == "All/Not allocated" or countries_in_region)
|
149 |
+
and (end_year_range[0] <= end_year_val <= end_year_range[1]) # ToDo add end_year filter again
|
150 |
):
|
151 |
filtered.append(r)
|
152 |
return filtered
|
|
|
172 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
173 |
|
174 |
# 2) Filter the entire sets
|
175 |
+
filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter, end_year_range) ## ToDo add end_year filter again
|
176 |
+
filtered_lexical = filter_results(lexical_all, country_filter, region_filter, end_year_range)## ToDo add end_year filter again
|
177 |
|
178 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
|
179 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
|
|
205 |
|
206 |
# 3) Now apply your region/country/year filter on that new list
|
207 |
filtered_lexical = filter_results(
|
208 |
+
lexical_substring_filtered, country_filter, region_filter, end_year_range
|
209 |
+
) ## ToDo add end_year filter again
|
210 |
|
211 |
# 4) Remove duplicates
|
212 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
|
|
277 |
|
278 |
additional_text = (
|
279 |
f"Commissioned by **{client_name}**\n"
|
280 |
+
|
281 |
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
282 |
+
|
283 |
+
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**"
|
284 |
)
|
285 |
st.markdown(additional_text)
|
286 |
st.divider()
|
|
|
293 |
else:
|
294 |
# Show the top 15 from filtered_semantic
|
295 |
for res in filtered_semantic_no_dupe[:15]:
|
296 |
+
# Metadata
|
297 |
+
metadata = res.payload.get('metadata', {})
|
298 |
+
countries = metadata.get('countries', "[]")
|
299 |
+
client_name = metadata.get('client', 'Unknown Client')
|
300 |
+
start_year = metadata.get('start_year', None)
|
301 |
+
end_year = metadata.get('end_year', None)
|
302 |
+
total_volume = metadata.get('total_volume', "Unknown")
|
303 |
+
total_project = metadata.get('total_project', "Unknown")
|
304 |
+
id = metadata.get('id', "Unknown")
|
305 |
project_name = res.payload['metadata'].get('project_name', 'Project Link')
|
306 |
+
proj_id = metadata.get('id', 'Unknown')
|
307 |
+
st.markdown(f"#### {project_name} [{proj_id}]")
|
308 |
|
309 |
+
# Snippet logic (80 words)
|
310 |
+
# Build snippet from objectives and descriptions.
|
311 |
+
objectives = metadata.get("objectives", "")
|
312 |
+
desc_de = metadata.get("description.de", "")
|
313 |
+
desc_en = metadata.get("description.en", "")
|
314 |
+
description = desc_de if desc_de else desc_en
|
315 |
+
full_snippet = f"Objective: {objectives} Description: {description}"
|
316 |
+
words = full_snippet.split()
|
317 |
+
preview_word_count = 200
|
318 |
preview_text = " ".join(words[:preview_word_count])
|
319 |
remainder_text = " ".join(words[preview_word_count:])
|
320 |
st.write(preview_text + ("..." if remainder_text else ""))
|
321 |
|
322 |
# Keywords
|
323 |
+
full_text = res.payload['page_content']
|
324 |
top_keywords = extract_top_keywords(full_text, top_n=5)
|
325 |
if top_keywords:
|
326 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
|
328 |
try:
|
329 |
c_list = json.loads(countries.replace("'", '"'))
|
|
|
346 |
# Format the year range
|
347 |
start_year_str = extract_year(start_year) if start_year else "Unknown"
|
348 |
end_year_str = extract_year(end_year) if end_year else "Unknown"
|
349 |
+
|
350 |
+
formatted_project_budget = format_currency(total_project)
|
351 |
+
formatted_total_volume = format_currency(total_volume)
|
352 |
+
|
353 |
|
354 |
# Build the final string
|
355 |
if matched_countries:
|
356 |
additional_text = (
|
357 |
+
f"**{', '.join(matched_countries)} f"Commissioned by **{client_name}**\n"
|
358 |
+
|
359 |
+
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
360 |
+
|
361 |
+
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**"
|
362 |
)
|
363 |
else:
|
364 |
additional_text = (
|
365 |
+
f"Commissioned by **{client_name}**\n"
|
366 |
+
|
367 |
+
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
368 |
+
|
369 |
+
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**"
|
370 |
)
|
371 |
|
372 |
|