annikwag commited on
Commit
50281ac
·
verified ·
1 Parent(s): beb0dce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -32
app.py CHANGED
@@ -93,8 +93,8 @@ with col_about:
93
  """, unsafe_allow_html=True
94
  )
95
 
96
- # Main query input
97
- var = st.text_input("Enter Question")
98
 
99
  ###########################################
100
  # Create or load the embeddings collection
@@ -134,7 +134,8 @@ def reset_filters():
134
  st.session_state["crs_filter"] = "All/Not allocated"
135
  st.session_state["min_budget"] = min_budget_val
136
  st.session_state["client_filter"] = "All/Not allocated"
137
- # Optionally reset page number
 
138
  st.session_state["page"] = 1
139
 
140
  ###########################################
@@ -144,7 +145,6 @@ col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
144
 
145
  with col1:
146
  region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
147
-
148
  if region_filter == "All/Not allocated":
149
  filtered_country_names = unique_country_names
150
  else:
@@ -180,7 +180,6 @@ with col5:
180
  key="min_budget"
181
  )
182
 
183
-
184
  ###########################################
185
  # Filter Controls - Row 2 (Additional Filters)
186
  ###########################################
@@ -196,10 +195,29 @@ with col3_2:
196
  with col4_2:
197
  st.empty()
198
  with col5_2:
199
- st.button("Reset Filters", on_click=reset_filters)
 
200
 
201
-
202
- show_exact_matches = st.checkbox("Show only exact matches", value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  ###########################################
205
  # Main Search / Results
@@ -259,7 +277,11 @@ else:
259
  except (ValueError, TypeError):
260
  return value
261
 
 
 
 
262
  # 3) Display results
 
263
  if show_exact_matches:
264
  st.write("Showing **Top Lexical Search results**")
265
  query_substring = var.strip().lower()
@@ -272,13 +294,31 @@ else:
272
  st.write('No exact matches, consider unchecking "Show only exact matches"')
273
  else:
274
  top_results = filtered_lexical_no_dupe # Show all matching lexical results
275
- for res in top_results:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  metadata = res.payload.get('metadata', {})
277
  if "title" not in metadata:
278
  metadata["title"] = compute_title(metadata)
279
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
280
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
281
- st.markdown(f"#### {title_clean}", unsafe_allow_html=True)
 
282
 
283
  objective = metadata.get("objective", "None")
284
  desc_en = metadata.get("description.en", "").strip()
@@ -297,7 +337,6 @@ else:
297
  if remainder_text:
298
  with st.expander("Show more"):
299
  st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
300
-
301
  with col_right:
302
  start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
303
  end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
@@ -311,21 +350,37 @@ else:
311
  new_crs_value = lookup_crs_value(crs_key_clean)
312
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
313
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
314
-
 
 
 
 
 
 
 
 
 
315
  additional_text = (
316
  f"**Objective:** {highlight_query(objective, var)}<br>"
317
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
318
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
319
- f"**Budget:** Project: {formatted_project_budget}, Total volume: {formatted_total_volume}<br>"
320
- f"**Country:** {country_raw}<br>"
 
321
  f"**Sector:** {crs_combined}"
322
  )
323
  contact = metadata.get("contact", "").strip()
324
  if contact and contact.lower() != "[email protected]":
325
  additional_text += f"<br>**Contact:** [email protected]"
326
  st.markdown(additional_text, unsafe_allow_html=True)
327
-
328
  st.divider()
 
 
 
 
 
 
 
329
  else:
330
  if not filtered_semantic_no_dupe:
331
  st.write("No relevant results found.")
@@ -334,30 +389,43 @@ else:
334
  total_results = len(filtered_semantic_no_dupe)
335
  total_pages = (total_results - 1) // page_size + 1
336
 
337
- # Use session_state for page selection; default to 1 if not set.
338
  if "page" not in st.session_state:
339
  st.session_state.page = 1
340
  current_page = st.session_state.page
341
 
342
- start_index = (current_page - 1) * page_size
 
 
 
 
 
343
  end_index = start_index + page_size
344
  top_results = filtered_semantic_no_dupe[start_index:end_index]
345
 
346
- st.write(f"Showing **{len(top_results)}** Semantic Search results (Page {current_page} of {total_pages})")
 
 
 
347
 
 
348
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
349
- st.markdown(
350
- f"<div style='background-color: #f0f0f0; color: #333; padding: 10px; border-radius: 5px; font-size:1.2em; text-align:center;'>{rag_answer}</div>",
351
- unsafe_allow_html=True
352
- )
 
 
 
 
353
  st.divider()
354
 
355
- for res in top_results:
356
  metadata = res.payload.get('metadata', {})
357
  if "title" not in metadata:
358
  metadata["title"] = compute_title(metadata)
359
  title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
360
- st.markdown(f"#### {title_clean}")
 
361
 
362
  desc_en = metadata.get("description.en", "").strip()
363
  desc_de = metadata.get("description.de", "").strip()
@@ -376,7 +444,6 @@ else:
376
  if remainder_text:
377
  with st.expander("Show more"):
378
  st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
379
-
380
  with col_right:
381
  start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
382
  end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
@@ -390,22 +457,31 @@ else:
390
  new_crs_value = lookup_crs_value(crs_key_clean)
391
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
392
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
393
-
 
 
 
 
 
 
 
 
394
  additional_text = (
395
  f"**Objective:** {metadata.get('objective', '')}<br>"
396
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
397
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
398
- f"**Budget:** Project: {formatted_project_budget}, Total volume: {formatted_total_volume}<br>"
399
- f"**Country:** {country_raw}<br>"
 
400
  f"**Sector:** {crs_combined}"
401
  )
402
  contact = metadata.get("contact", "").strip()
403
  if contact and contact.lower() != "[email protected]":
404
  additional_text += f"<br>**Contact:** [email protected]"
405
  st.markdown(additional_text, unsafe_allow_html=True)
406
-
407
  st.divider()
408
 
409
- # Pagination widget moved to the very end of the page
410
- new_page = st.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page")
411
- # The selected page value automatically updates st.session_state["page"]
 
 
93
  """, unsafe_allow_html=True
94
  )
95
 
96
+ # Main query input (with a key so we can reset it)
97
+ var = st.text_input("Enter Question", key="query")
98
 
99
  ###########################################
100
  # Create or load the embeddings collection
 
134
  st.session_state["crs_filter"] = "All/Not allocated"
135
  st.session_state["min_budget"] = min_budget_val
136
  st.session_state["client_filter"] = "All/Not allocated"
137
+ st.session_state["query"] = ""
138
+ st.session_state["show_exact_matches"] = False
139
  st.session_state["page"] = 1
140
 
141
  ###########################################
 
145
 
146
  with col1:
147
  region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
 
148
  if region_filter == "All/Not allocated":
149
  filtered_country_names = unique_country_names
150
  else:
 
180
  key="min_budget"
181
  )
182
 
 
183
  ###########################################
184
  # Filter Controls - Row 2 (Additional Filters)
185
  ###########################################
 
195
  with col4_2:
196
  st.empty()
197
  with col5_2:
198
+ # Plain reset button (will be moved to row 3 as well)
199
+ st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
200
 
201
+ ###########################################
202
+ # Filter Controls - Row 3 (Remaining Filter)
203
+ ###########################################
204
+ col1_3, col2_3, col3_3, col4_3, col5_3 = st.columns(5)
205
+ with col1_3:
206
+ # Place the "Show only exact matches" checkbox here
207
+ show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
208
+ with col2_3:
209
+ st.empty()
210
+ with col3_3:
211
+ st.empty()
212
+ with col4_3:
213
+ st.empty()
214
+ with col5_3:
215
+ # Right-align a more prominent reset button
216
+ with st.container():
217
+ st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
218
+ if st.button("**Reset Filters**", key="reset_button_row3"):
219
+ reset_filters()
220
+ st.markdown("</div>", unsafe_allow_html=True)
221
 
222
  ###########################################
223
  # Main Search / Results
 
277
  except (ValueError, TypeError):
278
  return value
279
 
280
+ # --- Reprint Query (Right Aligned with "Query:") ---
281
+ st.markdown(f"<div style='text-align: right; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
282
+
283
  # 3) Display results
284
+ # Lexical Search Results Branch
285
  if show_exact_matches:
286
  st.write("Showing **Top Lexical Search results**")
287
  query_substring = var.strip().lower()
 
294
  st.write('No exact matches, consider unchecking "Show only exact matches"')
295
  else:
296
  top_results = filtered_lexical_no_dupe # Show all matching lexical results
297
+
298
+ # --- Pagination (Above Lexical Results) ---
299
+ page_size = 15
300
+ total_results = len(top_results)
301
+ total_pages = (total_results - 1) // page_size + 1
302
+ if "page" not in st.session_state:
303
+ st.session_state.page = 1
304
+ current_page = st.session_state.page
305
+ # Top pagination widget (right aligned, 1/7 width)
306
+ col_pag_top = st.columns([6, 1])[1]
307
+ new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
308
+ st.session_state.page = new_page_top
309
+
310
+ start_index = (st.session_state.page - 1) * page_size
311
+ end_index = start_index + page_size
312
+ paged_results = top_results[start_index:end_index]
313
+
314
+ for i, res in enumerate(paged_results, start=start_index+1):
315
  metadata = res.payload.get('metadata', {})
316
  if "title" not in metadata:
317
  metadata["title"] = compute_title(metadata)
318
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
319
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
320
+ # Prepend the result number
321
+ st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
322
 
323
  objective = metadata.get("objective", "None")
324
  desc_en = metadata.get("description.en", "").strip()
 
337
  if remainder_text:
338
  with st.expander("Show more"):
339
  st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
 
340
  with col_right:
341
  start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
342
  end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
 
350
  new_crs_value = lookup_crs_value(crs_key_clean)
351
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
352
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
353
+
354
+ # Insert Predecessor/Successor line if available
355
+ predecessor = metadata.get("predecessor_id", "").strip()
356
+ successor = metadata.get("successor_id", "").strip()
357
+ extra_line = ""
358
+ if predecessor:
359
+ extra_line += f"<br>**Predecessor Project:** {predecessor}"
360
+ if successor:
361
+ extra_line += f"<br>**Successor Project:** {successor}"
362
+
363
  additional_text = (
364
  f"**Objective:** {highlight_query(objective, var)}<br>"
365
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
366
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
367
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
368
+ + extra_line +
369
+ f"<br>**Country:** {country_raw}<br>"
370
  f"**Sector:** {crs_combined}"
371
  )
372
  contact = metadata.get("contact", "").strip()
373
  if contact and contact.lower() != "[email protected]":
374
  additional_text += f"<br>**Contact:** [email protected]"
375
  st.markdown(additional_text, unsafe_allow_html=True)
 
376
  st.divider()
377
+
378
+ # Bottom pagination widget
379
+ col_pag_bot = st.columns([6, 1])[1]
380
+ new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot")
381
+ st.session_state.page = new_page_bot
382
+
383
+ # Semantic Search Results Branch
384
  else:
385
  if not filtered_semantic_no_dupe:
386
  st.write("No relevant results found.")
 
389
  total_results = len(filtered_semantic_no_dupe)
390
  total_pages = (total_results - 1) // page_size + 1
391
 
 
392
  if "page" not in st.session_state:
393
  st.session_state.page = 1
394
  current_page = st.session_state.page
395
 
396
+ # Top pagination widget (right aligned, 1/7 width)
397
+ col_pag_top = st.columns([6, 1])[1]
398
+ new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
399
+ st.session_state.page = new_page_top
400
+
401
+ start_index = (st.session_state.page - 1) * page_size
402
  end_index = start_index + page_size
403
  top_results = filtered_semantic_no_dupe[start_index:end_index]
404
 
405
+ # Prominent page info with bold numbers and green highlight if current page is not 1
406
+ page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
407
+ total_pages_str = f"<b>{total_pages}</b>"
408
+ st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
409
 
410
+ # --- RAG Answer (Right aligned, bullet points, bold numbers) ---
411
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
412
+ bullet_lines = []
413
+ for line in rag_answer.splitlines():
414
+ if line.strip():
415
+ # Bold any numbers in the line
416
+ line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
417
+ bullet_lines.append(f"<li>{line_bold}</li>")
418
+ formatted_rag_answer = "<ul style='text-align: right; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
419
+ st.markdown(formatted_rag_answer, unsafe_allow_html=True)
420
  st.divider()
421
 
422
+ for i, res in enumerate(top_results, start=start_index+1):
423
  metadata = res.payload.get('metadata', {})
424
  if "title" not in metadata:
425
  metadata["title"] = compute_title(metadata)
426
  title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
427
+ # Prepend result number and make title bold
428
+ st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
429
 
430
  desc_en = metadata.get("description.en", "").strip()
431
  desc_de = metadata.get("description.de", "").strip()
 
444
  if remainder_text:
445
  with st.expander("Show more"):
446
  st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
 
447
  with col_right:
448
  start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
449
  end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
 
457
  new_crs_value = lookup_crs_value(crs_key_clean)
458
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
459
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
460
+
461
+ predecessor = metadata.get("predecessor_id", "").strip()
462
+ successor = metadata.get("successor_id", "").strip()
463
+ extra_line = ""
464
+ if predecessor:
465
+ extra_line += f"<br>**Predecessor Project:** {predecessor}"
466
+ if successor:
467
+ extra_line += f"<br>**Successor Project:** {successor}"
468
+
469
  additional_text = (
470
  f"**Objective:** {metadata.get('objective', '')}<br>"
471
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
472
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
473
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
474
+ + extra_line +
475
+ f"<br>**Country:** {country_raw}<br>"
476
  f"**Sector:** {crs_combined}"
477
  )
478
  contact = metadata.get("contact", "").strip()
479
  if contact and contact.lower() != "[email protected]":
480
  additional_text += f"<br>**Contact:** [email protected]"
481
  st.markdown(additional_text, unsafe_allow_html=True)
 
482
  st.divider()
483
 
484
+ # Bottom pagination widget (right aligned, 1/7 width)
485
+ col_pag_bot = st.columns([6, 1])[1]
486
+ new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
487
+ st.session_state.page = new_page_bot