Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import json | |
import streamlit as st | |
import pandas as pd | |
def load_region_data(path): | |
""" | |
Load the region lookup CSV into a DataFrame. | |
""" | |
return pd.read_csv(path) | |
def clean_country_code(country_str): | |
""" | |
Clean up a country code string to keep only uppercase alpha-2. | |
""" | |
return country_str.strip().upper() | |
def get_country_name(alpha2, region_df): | |
""" | |
Given an alpha-2 code, return the English short name from region_df. | |
If not found, return the original code. | |
""" | |
row = region_df[region_df['alpha-2'] == alpha2] | |
if not row.empty: | |
return row['name'].values[0] | |
return alpha2 | |
def get_regions(region_df): | |
""" | |
Return a tuple: (list of unique region names, set of unique sub-regions). | |
This is just an example. Adjust as needed. | |
""" | |
unique_sub_regions = sorted(region_df['sub-region'].dropna().unique()) | |
return region_df, unique_sub_regions | |
def get_country_name_and_region_mapping( | |
_client, | |
collection_name, | |
region_df, | |
_hybrid_search_fn, | |
_clean_country_code_fn, | |
_get_country_name_fn | |
): | |
""" | |
Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection. | |
""" | |
# Note the underscores in the function parameters above. | |
# Inside the function, just call them as usual: | |
results = _hybrid_search_fn(_client, "", collection_name) | |
country_set = set() | |
for res in results[0] + results[1]: | |
country = res.payload.get('metadata', {}).get('country', "[]") | |
if country.strip().startswith("["): | |
try: | |
parsed_country = json.loads(country.replace("'", '"')) | |
if isinstance(parsed_country, str): | |
country_list = [parsed_country] | |
else: | |
country_list = parsed_country | |
except json.JSONDecodeError: | |
country_list = [] | |
else: | |
country_list = [country.strip()] | |
two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2] | |
country_set.update(two_digit_codes) | |
country_name_to_code = {} | |
iso_code_to_sub_region = {} | |
for code in country_set: | |
name = get_country_name_fn(code, region_df) | |
sub_region_row = region_df[region_df['alpha-2'] == code] | |
sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated" | |
country_name_to_code[name] = code | |
iso_code_to_sub_region[code] = sub_region | |
return country_name_to_code, iso_code_to_sub_region | |