Spaces:
Running
Running
Shunfeng Zheng
commited on
Update 1_SpatialParse.py
Browse files- 1_SpatialParse.py +59 -30
1_SpatialParse.py
CHANGED
@@ -11,33 +11,36 @@ from spacy.tokens import Span, Doc, Token
|
|
11 |
from utils import geoutil
|
12 |
import urllib.parse
|
13 |
import os
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
import pydantic
|
|
|
|
|
17 |
|
18 |
-
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
-
# API_TOKEN = 'hf_'
|
23 |
-
# BACKEND_URL = "https://dsbb0707-dockerb2.hf.space/api/predict/"
|
24 |
-
# def call_backend(input_text):
|
25 |
-
# try:
|
26 |
-
# headers = {
|
27 |
-
# "Authorization": f"Bearer {API_TOKEN}"
|
28 |
-
# }
|
29 |
-
# response = requests.post(
|
30 |
-
# BACKEND_URL,
|
31 |
-
# headers=headers,
|
32 |
-
# json={"data": [input_text]},
|
33 |
-
# timeout=10
|
34 |
-
# )
|
35 |
-
# if response.status_code == 200:
|
36 |
-
# result = response.json()["data"][0]
|
37 |
-
# return f"✅ {result['result']}\n⏰ {result['timestamp']}"
|
38 |
-
# return f"❌ Backend Error (HTTP {response.status_code})"
|
39 |
-
# except Exception as e:
|
40 |
-
# return f"⚠️ Connection Error: {str(e)}"
|
41 |
|
42 |
|
43 |
colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
|
@@ -230,18 +233,44 @@ def extract_spatial_entities(text):
|
|
230 |
# # doc.to_disk("saved_doc.spacy")
|
231 |
# doc.to_disk("/tmp/saved_doc.spacy")
|
232 |
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
|
242 |
-
|
243 |
|
244 |
-
|
245 |
pass
|
246 |
def show_sentence_selector_table(doc_copy):
|
247 |
st.markdown("**______________________________________________________________________________________**")
|
|
|
11 |
from utils import geoutil
|
12 |
import urllib.parse
|
13 |
import os
|
14 |
+
import requests
|
15 |
+
from spacy.tokens import Doc
|
16 |
+
from spacy.lang.en import English
|
17 |
+
|
18 |
|
19 |
import pydantic
|
20 |
+
print("Pydantic version:", pydantic.__version__)
|
21 |
+
|
22 |
|
23 |
+
API_TOKEN = os.getenv('API_TOKEN1')
|
24 |
|
25 |
+
BACKEND_URL = "https://dsbb0707-dockerb2.hf.space/api/predict/"
|
26 |
+
def call_backend(input_text):
|
27 |
+
try:
|
28 |
+
headers = {
|
29 |
+
"Authorization": f"Bearer {API_TOKEN}"
|
30 |
+
}
|
31 |
+
response = requests.post(
|
32 |
+
BACKEND_URL,
|
33 |
+
headers=headers,
|
34 |
+
json={"data": [input_text]},
|
35 |
+
timeout=10
|
36 |
+
)
|
37 |
+
if response.status_code == 200:
|
38 |
+
return response.json() # ✅ 保留原始 JSON 对象 (dict)
|
39 |
+
return {"error": f"❌ Backend Error (HTTP {response.status_code})"}
|
40 |
+
except Exception as e:
|
41 |
+
return {"error": f"⚠️ Connection Error: {str(e)}"}
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
|
46 |
colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
|
|
|
233 |
# # doc.to_disk("saved_doc.spacy")
|
234 |
# doc.to_disk("/tmp/saved_doc.spacy")
|
235 |
|
236 |
+
Span.set_extension("rse_id", default="", force=True)
|
237 |
+
api_result = call_backend(text)
|
238 |
+
print(api_result, 'dadada')
|
239 |
+
doc_element = api_result["data"][0]
|
240 |
+
st.markdown(type(api_result))
|
241 |
+
st.markdown(doc_element)
|
242 |
+
|
243 |
+
# doc_element = {'text': 'Between Burwood and Glebe.', 'ents': [{'start': 8, 'end': 15, 'label': 'GPE'}, {'start': 20, 'end': 25, 'label': 'GPE'}], 'tokens': [{'id': 0, 'start': 0, 'end': 7}, {'id': 1, 'start': 8, 'end': 15}, {'id': 2, 'start': 16, 'end': 19}, {'id': 3, 'start': 20, 'end': 25}, {'id': 4, 'start': 25, 'end': 26}], 'ents_ext': [{'start': 8, 'end': 15, 'label': 'GPE', 'rse_id': 'Burwood'}, {'start': 20, 'end': 25, 'label': 'GPE', 'rse_id': 'Glebe'}]}
|
244 |
+
# doc_element = {'text': 'I would like to know where is the area between Burwood and Glebe. Pyrmont.', 'ents': [{'start': 47, 'end': 54, 'label': 'GPE'}, {'start': 59, 'end': 64, 'label': 'GPE'}, {'start': 66, 'end': 73, 'label': 'GPE'}], 'sents': [{'start': 0, 'end': 65}, {'start': 66, 'end': 74}], 'tokens': [{'id': 0, 'start': 0, 'end': 1}, {'id': 1, 'start': 2, 'end': 7}, {'id': 2, 'start': 8, 'end': 12}, {'id': 3, 'start': 13, 'end': 15}, {'id': 4, 'start': 16, 'end': 20}, {'id': 5, 'start': 21, 'end': 26}, {'id': 6, 'start': 27, 'end': 29}, {'id': 7, 'start': 30, 'end': 33}, {'id': 8, 'start': 34, 'end': 38}, {'id': 9, 'start': 39, 'end': 46}, {'id': 10, 'start': 47, 'end': 54}, {'id': 11, 'start': 55, 'end': 58}, {'id': 12, 'start': 59, 'end': 64}, {'id': 13, 'start': 64, 'end': 65}, {'id': 14, 'start': 66, 'end': 73}, {'id': 15, 'start': 73, 'end': 74}]}
|
245 |
+
# doc_element =
|
246 |
+
|
247 |
+
|
248 |
+
|
249 |
+
nlp = English()
|
250 |
+
nlp.add_pipe("sentencizer")
|
251 |
+
doc = Doc(nlp.vocab).from_json(doc_element)
|
252 |
+
doc = nlp.get_pipe("sentencizer")(doc)
|
253 |
+
st.markdown(type(doc))
|
254 |
+
|
255 |
+
for ent_ext in doc_element["ents_ext"]:
|
256 |
+
for ent in doc.ents:
|
257 |
+
if ent.start_char == ent_ext["start"] and ent.end_char == ent_ext["end"]:
|
258 |
+
ent._.rse_id = ent_ext["rse_id"]
|
259 |
+
|
260 |
+
|
261 |
+
doc = set_selected_entities(doc)
|
262 |
+
doc.to_disk("saved_doc.spacy")
|
263 |
+
doc.to_disk("/tmp/saved_doc.spacy")
|
264 |
+
|
265 |
|
266 |
+
html = displacy.render(doc,style="ent", options = options)
|
267 |
+
html = html.replace("\n","")
|
268 |
+
st.write(HTML_WRAPPER.format(html),unsafe_allow_html=True)
|
269 |
+
show_spatial_ent_table(doc, text)
|
270 |
|
271 |
+
st.markdown("123123")
|
272 |
|
273 |
+
show_sentence_selector_table(doc)
|
274 |
pass
|
275 |
def show_sentence_selector_table(doc_copy):
|
276 |
st.markdown("**______________________________________________________________________________________**")
|