import streamlit as st
from spacy import displacy
import spacy
import geospacy
from PIL import Image
import base64
import sys
import pandas as pd
import en_core_web_md
from spacy.tokens import Span, Doc, Token
from utils import geoutil
from utils import llm_coding
import urllib.parse
import json
colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
options = {"ents": ['GPE', 'LOC', "RSE"], "colors": colors}
HTML_WRAPPER = """
{}
"""
model = ""
gpe_selected = "GPE"
loc_selected = "LOC"
rse_selected = "RSE"
types = ""
#BASE_URL = "http://localhost:8080/"
BASE_URL = ""
def set_header():
LOGO_IMAGE = "tetis-1.png"
st.markdown(
"""
""",
unsafe_allow_html=True
)
st.markdown(
f"""
GeOspaCy
""",
unsafe_allow_html=True
)
def set_side_menu():
global gpe_selected, loc_selected, rse_selected, model, types
types =""
params = st.query_params
st.sidebar.markdown("## Spacy Model")
st.sidebar.markdown("You can **select** the values of the *spacy model* from Dropdown.")
models = ['en_core_web_sm', 'en_core_web_md', 'en_core_web_lg', 'en_core_web_trf']
if "model" in params:
default_ix = models.index(params["model"][0])
else:
default_ix = models.index('en_core_web_sm')
model = st.sidebar.selectbox('Spacy Model',models, index=default_ix)
st.sidebar.markdown("## Spatial Entity Labels")
st.sidebar.markdown("**Mark** the Spatial Entities you want to extract?")
tpes = ""
if "type" in params:
tpes = params['type'][0]
if "g" in tpes:
gpe = st.sidebar.checkbox('GPE', value = True)
else:
gpe = st.sidebar.checkbox('GPE')
if "l" in tpes:
loc = st.sidebar.checkbox('LOC', value = True)
else:
loc = st.sidebar.checkbox('LOC')
if "r" in tpes:
rse = st.sidebar.checkbox('RSE', value = True)
else:
rse = st.sidebar.checkbox('RSE')
if(gpe):
gpe_selected ="GPE"
types+="g"
if(loc):
loc_selected ="LOC"
types+="l"
if(rse):
rse_selected ="RSE"
types+="r"
def set_input():
params = st.query_params
# 设置默认文本
default_text = params["text"][0] if "text" in params else ""
# 更友好的提示语
# text = st.text_area("Enter a location description to extract spatial entities:", default_text)
text = st.text_area("✍️ **Please input your text here:**", default_text)
# text = st.text_area("### Please input your text here:", default_text)
# 提交按钮
if st.button("Extract"):
return text
def set_selected_entities(doc):
global gpe_selected, loc_selected, rse_selected, model
ents = [ent for ent in doc.ents if ent.label_ == gpe_selected or ent.label_ == loc_selected or ent.label_ == rse_selected]
doc.ents = ents
return doc
def extract_spatial_entities(text):
nlp = spacy.load("en_core_web_md")
nlp.add_pipe("spatial_pipeline", after="ner")
doc = nlp(text)
# 分句处理
sent_ents = []
sent_texts = []
sent_rse_id = []
offset = 0
sent_start_positions = [0]
doc_copy = doc.copy()
for sent in doc.sents:
sent_doc = nlp(sent.text)
sent_doc = set_selected_entities(sent_doc)
sent_texts.append(sent_doc.text)
for ent in sent_doc.ents:
sent_rse_id.append(ent._.rse_id)
for ent in sent_doc.ents:
new_ent = Span(doc, ent.start + offset, ent.end + offset, label=ent.label_)
sent_ents.append(new_ent)
offset += len(sent)
sent_start_positions.append(sent_start_positions[-1] + len(sent))
# **创建新 Doc**
final_doc = Doc(nlp.vocab, words=[token.text for token in doc], spaces=[token.whitespace_ for token in doc])
for i in sent_start_positions:
if i < len(final_doc):
final_doc[i].is_sent_start = True
final_doc.set_ents(sent_ents)
for i in range(len(sent_rse_id)):
final_doc.ents[i]._.rse_id = sent_rse_id[i]
doc = final_doc
doc.to_disk("saved_doc.spacy")
highlight_ents = []
if 'g' in types:
highlight_ents.append('GPE')
if 'l' in types:
highlight_ents.append('LOC')
if 'r' in types:
highlight_ents.append('RSE')
options = {"ents": highlight_ents, "colors": colors}
html = displacy.render(doc, style="ent", options=options)
html = html.replace("\n","")
st.write(HTML_WRAPPER.format(html),unsafe_allow_html=True)
show_spatial_ent_table(doc, text)
show_sentence_selector_table(doc_copy)
def show_sentence_selector_table(doc_copy):
text = doc_copy.text
st.markdown("**______________________________________________________________________________________**")
st.markdown("**LLM-generated Spatial Composition**")
combo_obj = llm_coding.llmapi(text)
combo_lines = [json.dumps(item, separators=(", ", ": ")) for item in combo_obj]
combo_str = json.dumps(combo_obj)
combo_encoded = urllib.parse.quote(combo_str)
combo_encoded = urllib.parse.quote(combo_str)
text_encoded = urllib.parse.quote(text)
url = f"{BASE_URL}Locate?mode=geocombo&text={text_encoded}&combo={combo_encoded}"
rows = [{
'LLM Output': f'{combo_str}
',
'Action': f'Use this spatial composition'
}]
df = pd.DataFrame(rows)
custom_style = """
"""
st.markdown(custom_style, unsafe_allow_html=True)
st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
def show_spatial_ent_table(doc, text):
global types
if len(doc.ents) > 0:
st.markdown("**______________________________________________________________________________________**")
st.markdown("**Spatial Entities List**")
df = pd.DataFrame(columns=['Sr.', 'entity', 'label', 'Map', 'GEOJson'])
rows = [] # 用于存储所有行
for ent in doc.ents:
url_map = BASE_URL + "Locate?map=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id
url_json = BASE_URL + "Locate?geojson=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id
new_row = {
'Sr.': len(rows) + 1,
'entity': ent.text,
'label': ent.label_,
'Map': f'View',
'GEOJson': f'View'
}
rows.append(new_row)
df = pd.DataFrame(rows)
st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
def set_header():
# LOGO_IMAGE = "title.jpg"
st.markdown(
"""
""",
unsafe_allow_html=True
)
st.markdown(
"""
""",
unsafe_allow_html=True
)
def set_side_menu():
global gpe_selected, loc_selected, rse_selected, model, types
types = ""
params = st.query_params
st.sidebar.markdown("## Deployment Method")
st.sidebar.markdown("You can select the deployment method for the model.")
deployment_options = ["API", "Local deployment"]
use_local_model = st.sidebar.radio("Choose deployment method:", deployment_options, index=0) == "Local deployment"
if use_local_model:
local_model_path = st.sidebar.text_input("Enter local model path:", "")
st.sidebar.markdown("## LLM Model")
st.sidebar.markdown("You can **select** different *LLM model* powered by API.")
models = ['Llama-3-8B', 'Mistral-7B-0.3', 'Gemma-2-10B', 'GPT-4o', 'Gemini Pro', 'Deepseek-R1', 'en_core_web_sm', 'en_core_web_md', 'en_core_web_lg', 'en_core_web_trf']
if "model" in params:
default_ix = models.index(params["model"][0])
else:
default_ix = models.index('GPT-4o')
model = st.sidebar.selectbox('LLM Model', models, index=default_ix)
st.sidebar.markdown("## Spatial Entity Labels")
st.sidebar.markdown("Please **Mark** the Spatial Entities you want to extract.")
tpes = ""
if "type" in params:
tpes = params['type'][0]
# st.sidebar.markdown("### Absolute Spatial Entity:")
if "g" in tpes:
gpe = st.sidebar.checkbox('GPE', value=True)
else:
gpe = st.sidebar.checkbox('GPE')
if "l" in tpes:
loc = st.sidebar.checkbox('LOC', value=True)
else:
loc = st.sidebar.checkbox('LOC')
st.sidebar.markdown("### Relative Spatial Entity:")
if "r" in tpes:
rse = st.sidebar.checkbox('RSE', value=True)
else:
rse = st.sidebar.checkbox('RSE')
if (gpe):
gpe_selected = "GPE"
types += "g"
if (loc):
loc_selected = "LOC"
types += "l"
if (rse):
rse_selected = "RSE"
types += "r"
def main():
global gpe_selected, loc_selected, rse_selected, model
set_header()
set_side_menu()
text = set_input()
if(text is not None):
extract_spatial_entities(text)
elif "text" in st.session_state:
text = st.session_state.text
extract_spatial_entities(text)
if __name__ == '__main__':
main()