Shunfeng Zheng commited on
Commit
e6031dd
·
verified ·
1 Parent(s): 96eed5e

Update 1_SpatialParse.py

Browse files
Files changed (1) hide show
  1. 1_SpatialParse.py +84 -64
1_SpatialParse.py CHANGED
@@ -9,9 +9,28 @@ import pandas as pd
9
  # import en_core_web_md
10
  from spacy.tokens import Span, Doc, Token
11
  from utils import geoutil
12
-
13
  import urllib.parse
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
17
  options = {"ents": ['GPE', 'LOC', "RSE"], "colors": colors}
@@ -147,73 +166,74 @@ def set_selected_entities(doc):
147
  return doc
148
 
149
  def extract_spatial_entities(text):
150
- # nlp = en_core_web_md.load()
151
-
152
- # nlp = spacy.load("en_core_web_md")
 
 
 
 
 
 
 
 
 
153
  # nlp.add_pipe("spatial_pipeline", after="ner")
154
  # doc = nlp(text)
155
- # doc = set_selected_entities(doc)
156
- # html = displacy.render(doc, style="ent", options=options)
157
- # html = html.replace("\n", "")
158
- # st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # show_spatial_ent_table(doc, text)
160
 
161
- nlp = spacy.load("en_core_web_md") #####
162
- nlp.add_pipe("spatial_pipeline", after="ner")
163
- doc = nlp(text)
164
-
165
- # 分句处理
166
- sent_ents = []
167
- sent_texts = []
168
- sent_rse_id = []
169
- offset = 0 # 记录当前 token 偏移量
170
- sent_start_positions = [0] # 记录句子信息
171
- doc_copy = doc.copy() # 用于展示方程组合
172
- for sent in doc.sents:
173
-
174
- sent_doc = nlp(sent.text) # 逐句处理
175
- sent_doc = set_selected_entities(sent_doc) # 这里处理实体
176
- sent_texts.append(sent_doc.text)
177
-
178
- for ent in sent_doc.ents:
179
- sent_rse_id.append(ent._.rse_id)
180
- # **调整每个实体的索引,使其匹配完整文本**
181
- for ent in sent_doc.ents:
182
- new_ent = Span(doc, ent.start + offset, ent.end + offset, label=ent.label_)
183
- sent_ents.append(new_ent)
184
-
185
- offset += len(sent) # 更新偏移量
186
- sent_start_positions.append(sent_start_positions[-1] + len(sent)) # 记录句子起点
187
- # **创建新 Doc**
188
- final_doc = Doc(nlp.vocab, words=[token.text for token in doc], spaces=[token.whitespace_ for token in doc])
189
- for i in sent_start_positions: # 手动标记句子起始点
190
- if i < len(final_doc):
191
- final_doc[i].is_sent_start = True
192
- # **设置实体**
193
- final_doc.set_ents(sent_ents)
194
-
195
- for i in range(len(sent_rse_id)):
196
- final_doc.ents[i]._.rse_id = sent_rse_id[i]
197
- print(doc.ents[0].sent, '原始')
198
- doc = final_doc
199
- print(doc.ents[0].sent, '新')
200
- # 分句处理完毕
201
-
202
- # doc = set_selected_entities(doc)
203
- # doc.to_disk("saved_doc.spacy")
204
- doc.to_disk("/tmp/saved_doc.spacy")
205
-
206
-
207
-
208
-
209
- html = displacy.render(doc,style="ent", options = options)
210
- html = html.replace("\n","")
211
- st.write(HTML_WRAPPER.format(html),unsafe_allow_html=True)
212
- show_spatial_ent_table(doc, text)
213
-
214
- st.markdown("123123")
215
-
216
- show_sentence_selector_table(doc_copy)
217
 
218
  def show_sentence_selector_table(doc_copy):
219
  st.markdown("**______________________________________________________________________________________**")
 
9
  # import en_core_web_md
10
  from spacy.tokens import Span, Doc, Token
11
  from utils import geoutil
 
12
  import urllib.parse
13
 
14
+ API_TOKEN = os.getenv("HF_API_TOKEN")
15
+ BACKEND_URL = "https://dsbb0707-dockerb2.hf.space/api/predict/"
16
+ def call_backend(input_text):
17
+ try:
18
+ headers = {
19
+ "Authorization": f"Bearer {API_TOKEN}"
20
+ }
21
+ response = requests.post(
22
+ BACKEND_URL,
23
+ headers=headers,
24
+ json={"data": [input_text]},
25
+ timeout=10
26
+ )
27
+ if response.status_code == 200:
28
+ result = response.json()["data"][0]
29
+ return f"✅ {result['result']}\n⏰ {result['timestamp']}"
30
+ return f"❌ Backend Error (HTTP {response.status_code})"
31
+ except Exception as e:
32
+ return f"⚠️ Connection Error: {str(e)}"
33
+
34
 
35
  colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"}
36
  options = {"ents": ['GPE', 'LOC', "RSE"], "colors": colors}
 
166
  return doc
167
 
168
  def extract_spatial_entities(text):
169
+ # # nlp = en_core_web_md.load()
170
+
171
+ # # nlp = spacy.load("en_core_web_md")
172
+ # # nlp.add_pipe("spatial_pipeline", after="ner")
173
+ # # doc = nlp(text)
174
+ # # doc = set_selected_entities(doc)
175
+ # # html = displacy.render(doc, style="ent", options=options)
176
+ # # html = html.replace("\n", "")
177
+ # # st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
178
+ # # show_spatial_ent_table(doc, text)
179
+
180
+ # nlp = spacy.load("en_core_web_md") #####
181
  # nlp.add_pipe("spatial_pipeline", after="ner")
182
  # doc = nlp(text)
183
+
184
+ # # 分句处理
185
+ # sent_ents = []
186
+ # sent_texts = []
187
+ # sent_rse_id = []
188
+ # offset = 0 # 记录当前 token 偏移量
189
+ # sent_start_positions = [0] # 记录句子信息
190
+ # doc_copy = doc.copy() # 用于展示方程组合
191
+ # for sent in doc.sents:
192
+
193
+ # sent_doc = nlp(sent.text) # 逐句处理
194
+ # sent_doc = set_selected_entities(sent_doc) # 这里处理实体
195
+ # sent_texts.append(sent_doc.text)
196
+
197
+ # for ent in sent_doc.ents:
198
+ # sent_rse_id.append(ent._.rse_id)
199
+ # # **调整每个实体的索引,使其匹配完整文本**
200
+ # for ent in sent_doc.ents:
201
+ # new_ent = Span(doc, ent.start + offset, ent.end + offset, label=ent.label_)
202
+ # sent_ents.append(new_ent)
203
+
204
+ # offset += len(sent) # 更新偏移量
205
+ # sent_start_positions.append(sent_start_positions[-1] + len(sent)) # 记录句子起点
206
+ # # **创建新 Doc**
207
+ # final_doc = Doc(nlp.vocab, words=[token.text for token in doc], spaces=[token.whitespace_ for token in doc])
208
+ # for i in sent_start_positions: # 手动标记句子起始点
209
+ # if i < len(final_doc):
210
+ # final_doc[i].is_sent_start = True
211
+ # # **设置实体**
212
+ # final_doc.set_ents(sent_ents)
213
+
214
+ # for i in range(len(sent_rse_id)):
215
+ # final_doc.ents[i]._.rse_id = sent_rse_id[i]
216
+ # print(doc.ents[0].sent, '原始')
217
+ # doc = final_doc
218
+ # print(doc.ents[0].sent, '新')
219
+ # # 分句处理完毕
220
+
221
+ # # doc = set_selected_entities(doc)
222
+ # # doc.to_disk("saved_doc.spacy")
223
+ # doc.to_disk("/tmp/saved_doc.spacy")
224
+
225
+ api_result = call_backend(text)
226
+ st.markdown(api_result)
227
+ st.text_area(api_result)
228
+
229
+ # html = displacy.render(doc,style="ent", options = options)
230
+ # html = html.replace("\n","")
231
+ # st.write(HTML_WRAPPER.format(html),unsafe_allow_html=True)
232
  # show_spatial_ent_table(doc, text)
233
 
234
+ # st.markdown("123123")
235
+
236
+ # show_sentence_selector_table(doc_copy)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  def show_sentence_selector_table(doc_copy):
239
  st.markdown("**______________________________________________________________________________________**")