Spaces:

SpatialWebAgent
/

docker2

Running

App Files Files Community

Shunfeng Zheng commited on 22 days ago

Commit

8c23de1

verified ·

1 Parent(s): 9e246a3

Update 1_SpatialParse.py

Browse files

Files changed (1) hide show

1_SpatialParse.py +7 -88

1_SpatialParse.py CHANGED Viewed

@@ -6,7 +6,6 @@ from PIL import Image
 import base64
 import sys
 import pandas as pd
-# import en_core_web_md
 from spacy.tokens import Span, Doc, Token
 from utils import geoutil
 import urllib.parse
@@ -156,17 +155,6 @@ def set_input():
         text = st.text_area("Enter the text to extract {Spatial Entities}", params["text"][0])
     if(st.button("Extract")):
-        # return 'France has detected a highly pathogenic strain of bird flu in a pet shop near Paris, days after an identical outbreak in one of Corsica’s main cities.'
-        return 'Between Glebe and Pyrmont. Burwood.'
-        return 'I would like to know where is the area between Burwood and Glebe. Pyrmont.'
-        return '5 km east of Burwood. 3 km south of Glebe. Between Pyrmont and Glebe.'
-        # return 'Between Burwood and Pyrmont.'
-        # return 'Between Burwood and Glebe.'
-        # return 'Between Burwood and Darling Harbour.'
-        # return 'Between China and USA.'
-        # return 'The Burwood city.'
-        # text = "New York is north of Washington. Between Burwood and Pyrmont city."
         return text
 def set_selected_entities(doc):
@@ -177,61 +165,7 @@ def set_selected_entities(doc):
     return doc
 def extract_spatial_entities(text):
-    # # nlp = en_core_web_md.load()
-    # # nlp = spacy.load("en_core_web_md")
-    # # nlp.add_pipe("spatial_pipeline", after="ner")
-    # # doc = nlp(text)
-    # # doc = set_selected_entities(doc)
-    # # html = displacy.render(doc, style="ent", options=options)
-    # # html = html.replace("\n", "")
-    # # st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
-    # # show_spatial_ent_table(doc, text)
-    # nlp = spacy.load("en_core_web_md")                                  #####
-    # nlp.add_pipe("spatial_pipeline", after="ner")
-    # doc = nlp(text)
-    # # 分句处理
-    # sent_ents = []
-    # sent_texts = []
-    # sent_rse_id = []
-    # offset = 0                              # 记录当前 token 偏移量
-    # sent_start_positions = [0]              # 记录句子信息
-    # doc_copy = doc.copy()                   # 用于展示方程组合
-    # for sent in doc.sents:
-    #     sent_doc = nlp(sent.text)  # 逐句处理
-    #     sent_doc = set_selected_entities(sent_doc)  # 这里处理实体
-    #     sent_texts.append(sent_doc.text)
-    #     for ent in sent_doc.ents:
-    #         sent_rse_id.append(ent._.rse_id)
-    #     # **调整每个实体的索引，使其匹配完整文本**
-    #     for ent in sent_doc.ents:
-    #         new_ent = Span(doc, ent.start + offset, ent.end + offset, label=ent.label_)
-    #         sent_ents.append(new_ent)
-    #     offset += len(sent)  # 更新偏移量
-    #     sent_start_positions.append(sent_start_positions[-1] + len(sent))           # 记录句子起点
-    # # **创建新 Doc**
-    # final_doc = Doc(nlp.vocab, words=[token.text for token in doc], spaces=[token.whitespace_ for token in doc])
-    # for i in sent_start_positions:                      # 手动标记句子起始点
-    #     if i < len(final_doc):
-    #         final_doc[i].is_sent_start = True
-    # # **设置实体**
-    # final_doc.set_ents(sent_ents)
-    # for i in range(len(sent_rse_id)):
-    #     final_doc.ents[i]._.rse_id = sent_rse_id[i]
-    # print(doc.ents[0].sent, '原始')
-    # doc = final_doc
-    # print(doc.ents[0].sent, '新')
-    # # 分句处理完毕
-    # # doc = set_selected_entities(doc)
-    # # doc.to_disk("saved_doc.spacy")
-    # doc.to_disk("/tmp/saved_doc.spacy")
     Span.set_extension("rse_id", default="", force=True)
     api_result = call_backend(text)
@@ -240,11 +174,6 @@ def extract_spatial_entities(text):
     st.markdown(type(api_result))
     st.markdown(doc_element)
-    # doc_element = {'text': 'Between Burwood and Glebe.', 'ents': [{'start': 8, 'end': 15, 'label': 'GPE'}, {'start': 20, 'end': 25, 'label': 'GPE'}], 'tokens': [{'id': 0, 'start': 0, 'end': 7}, {'id': 1, 'start': 8, 'end': 15}, {'id': 2, 'start': 16, 'end': 19}, {'id': 3, 'start': 20, 'end': 25}, {'id': 4, 'start': 25, 'end': 26}], 'ents_ext': [{'start': 8, 'end': 15, 'label': 'GPE', 'rse_id': 'Burwood'}, {'start': 20, 'end': 25, 'label': 'GPE', 'rse_id': 'Glebe'}]}
-    # doc_element = {'text': 'I would like to know where is the area between Burwood and Glebe. Pyrmont.', 'ents': [{'start': 47, 'end': 54, 'label': 'GPE'}, {'start': 59, 'end': 64, 'label': 'GPE'}, {'start': 66, 'end': 73, 'label': 'GPE'}], 'sents': [{'start': 0, 'end': 65}, {'start': 66, 'end': 74}], 'tokens': [{'id': 0, 'start': 0, 'end': 1}, {'id': 1, 'start': 2, 'end': 7}, {'id': 2, 'start': 8, 'end': 12}, {'id': 3, 'start': 13, 'end': 15}, {'id': 4, 'start': 16, 'end': 20}, {'id': 5, 'start': 21, 'end': 26}, {'id': 6, 'start': 27, 'end': 29}, {'id': 7, 'start': 30, 'end': 33}, {'id': 8, 'start': 34, 'end': 38}, {'id': 9, 'start': 39, 'end': 46}, {'id': 10, 'start': 47, 'end': 54}, {'id': 11, 'start': 55, 'end': 58}, {'id': 12, 'start': 59, 'end': 64}, {'id': 13, 'start': 64, 'end': 65}, {'id': 14, 'start': 66, 'end': 73}, {'id': 15, 'start': 73, 'end': 74}]}
-    # doc_element =
     nlp = English()
     nlp.add_pipe("sentencizer")
@@ -276,14 +205,14 @@ def show_sentence_selector_table(doc_copy):
     st.markdown("**______________________________________________________________________________________**")
     st.markdown("**Sentence Selector for Geographic Composition**")
-    # 提取句子
     sentences = list(doc_copy.sents)
-    # 构建表格数据
     rows = []
     for idx, sent in enumerate(sentences):
         sentence_text = sent.text.strip()
-        # 生成跳转链接（定位到Tagger）
         url = BASE_URL + "Tagger?mode=geocombo&text=" + urllib.parse.quote(sentence_text)
         new_row = {
             'Sr.': idx + 1,
@@ -292,7 +221,6 @@ def show_sentence_selector_table(doc_copy):
         }
         rows.append(new_row)
-    # 转为 DataFrame 并渲染为 HTML
     df = pd.DataFrame(rows)
     st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
@@ -304,9 +232,8 @@ def show_spatial_ent_table(doc, text):
         st.markdown("**______________________________________________________________________________________**")
         st.markdown("**Spatial Entities List**")
-        # 初始化一个空 DataFrame
         df = pd.DataFrame(columns=['Sr.', 'entity', 'label', 'Map', 'GEOJson'])
-        rows = []  # 用于存储所有行
         for ent in doc.ents:
             url_map = BASE_URL + "Tagger?map=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id
@@ -314,7 +241,6 @@ def show_spatial_ent_table(doc, text):
             print(ent._.rse_id, 'pppp')
             url_json = BASE_URL + "Tagger?geojson=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id
-            # 创建新行
             new_row = {
                 'Sr.': len(rows) + 1,
                 'entity': ent.text,
@@ -323,19 +249,12 @@ def show_spatial_ent_table(doc, text):
                 'GEOJson': f'<a target="_self" href="{url_json}">View</a>'
             }
-            rows.append(new_row)  # 将新行添加到列表中
-        # 将所有行转为 DataFrame
         df = pd.DataFrame(rows)
-        # 使用 Streamlit 显示 HTML 表格
         st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
-    # params = st.experimental_get_query_params()
-    # params = st.query_params
-    # ase, level_1, level_2, level_3 = geoutil.get_ent(params["entity"][0])
-    # print(geoutil.get_ent(params), 'ppppp')
 def set_header():       # tetis Geospacy LOGO
     LOGO_IMAGE = "title.jpg"
@@ -442,7 +361,7 @@ def set_side_menu():
 def main():
     global gpe_selected, loc_selected, rse_selected, model
-    #print(displacy.templates.TPL_ENT)
     set_header()
     set_side_menu()

 import base64
 import sys
 import pandas as pd
 from spacy.tokens import Span, Doc, Token
 from utils import geoutil
 import urllib.parse
         text = st.text_area("Enter the text to extract {Spatial Entities}", params["text"][0])
     if(st.button("Extract")):
         return text
 def set_selected_entities(doc):
     return doc
 def extract_spatial_entities(text):
     Span.set_extension("rse_id", default="", force=True)
     api_result = call_backend(text)
     st.markdown(type(api_result))
     st.markdown(doc_element)
     nlp = English()
     nlp.add_pipe("sentencizer")
     st.markdown("**______________________________________________________________________________________**")
     st.markdown("**Sentence Selector for Geographic Composition**")
     sentences = list(doc_copy.sents)
     rows = []
     for idx, sent in enumerate(sentences):
         sentence_text = sent.text.strip()
         url = BASE_URL + "Tagger?mode=geocombo&text=" + urllib.parse.quote(sentence_text)
         new_row = {
             'Sr.': idx + 1,
         }
         rows.append(new_row)
     df = pd.DataFrame(rows)
     st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
         st.markdown("**______________________________________________________________________________________**")
         st.markdown("**Spatial Entities List**")
         df = pd.DataFrame(columns=['Sr.', 'entity', 'label', 'Map', 'GEOJson'])
+        rows = []
         for ent in doc.ents:
             url_map = BASE_URL + "Tagger?map=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id
             print(ent._.rse_id, 'pppp')
             url_json = BASE_URL + "Tagger?geojson=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id
             new_row = {
                 'Sr.': len(rows) + 1,
                 'entity': ent.text,
                 'GEOJson': f'<a target="_self" href="{url_json}">View</a>'
             }
+            rows.append(new_row)
         df = pd.DataFrame(rows)
         st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
 def set_header():       # tetis Geospacy LOGO
     LOGO_IMAGE = "title.jpg"
 def main():
     global gpe_selected, loc_selected, rse_selected, model
     set_header()
     set_side_menu()