Spaces:
Running
Running
Shunfeng Zheng
commited on
Upload 12 files
Browse files- geospacy.py +256 -0
- regex_spatial.py +72 -0
- utils/.DS_Store +0 -0
- utils/__pycache__/config.cpython-310.pyc +0 -0
- utils/__pycache__/geoutil.cpython-310.pyc +0 -0
- utils/__pycache__/geoutil.cpython-39.pyc +0 -0
- utils/__pycache__/llm_coding.cpython-310.pyc +0 -0
- utils/__pycache__/llm_ent_extract.cpython-310.pyc +0 -0
- utils/config.py +1 -0
- utils/geoutil.py +262 -0
- utils/llm_coding.py +582 -0
- utils/llm_ent_extract.py +145 -0
geospacy.py
ADDED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from spacy.tokens import Span
|
2 |
+
from spacy.tokens import Doc
|
3 |
+
from spacy.tokens import Token
|
4 |
+
import regex_spatial
|
5 |
+
from spacy.language import Language
|
6 |
+
import re
|
7 |
+
from utils import llm_ent_extract
|
8 |
+
|
9 |
+
id =""
|
10 |
+
rse_id = "rse_id"
|
11 |
+
def set_extension():
|
12 |
+
Span.set_extension(rse_id, default = "",force = True)
|
13 |
+
Doc.set_extension(rse_id, default = "",force = True)
|
14 |
+
Token.set_extension(rse_id, default = "",force = True)
|
15 |
+
|
16 |
+
def get_level1(doc, sentence, ent):
|
17 |
+
return find_ent_by_regex(doc, sentence, ent, regex_spatial.get_level1_regex())
|
18 |
+
|
19 |
+
def get_level2(doc, sentence, ent):
|
20 |
+
return find_ent_by_regex(doc, sentence, ent, regex_spatial.get_level2_regex())
|
21 |
+
|
22 |
+
def get_level3(doc, sentence, ent):
|
23 |
+
return find_ent_by_regex(doc, sentence, ent, regex_spatial.get_level3_regex())
|
24 |
+
|
25 |
+
|
26 |
+
def find_ent_by_regex(doc, sentence, ent, regex):
|
27 |
+
global id
|
28 |
+
|
29 |
+
if id == "":
|
30 |
+
id = ent.text
|
31 |
+
for match in re.finditer(regex, doc.text):
|
32 |
+
start, end = match.span()
|
33 |
+
if(start>= sentence.start_char and start<= sentence.end_char):
|
34 |
+
span = doc.char_span(start, end)
|
35 |
+
if span is not None:
|
36 |
+
id = span.text +"_"+ id
|
37 |
+
if(start > ent.end_char):
|
38 |
+
ent.end_char = end
|
39 |
+
else:
|
40 |
+
ent.start_char = start
|
41 |
+
|
42 |
+
return ent
|
43 |
+
|
44 |
+
return ent
|
45 |
+
|
46 |
+
|
47 |
+
def update_entities(doc, entity_texts, replace=True):
|
48 |
+
"""
|
49 |
+
根据给定的文本内容标注实体,并直接修改 doc.ents。
|
50 |
+
|
51 |
+
:param doc: spaCy 解析后的 Doc 对象
|
52 |
+
:param entity_texts: 字典,键是要标注的实体文本,值是对应的实体类别
|
53 |
+
:param replace: 布尔值,True 则替换现有实体,False 则保留现有实体并添加新的
|
54 |
+
"""
|
55 |
+
new_ents = list(doc.ents) if not replace else [] # 如果 replace=False,保留已有实体
|
56 |
+
|
57 |
+
for ent_text, ent_label in entity_texts.items():
|
58 |
+
start = doc.text.find(ent_text) # 在全文中查找文本位置
|
59 |
+
if start != -1:
|
60 |
+
start_token = len(doc.text[:start].split()) # 计算起始 token 索引
|
61 |
+
end_token = start_token + len(ent_text.split()) # 计算结束 token 索引
|
62 |
+
|
63 |
+
if start_token < len(doc) and end_token <= len(doc): # 确保索引不越界
|
64 |
+
new_ent = Span(doc, start_token, end_token, label=ent_label)
|
65 |
+
new_ents.append(new_ent)
|
66 |
+
|
67 |
+
doc.set_ents(new_ents) # 更新 doc.ents
|
68 |
+
|
69 |
+
|
70 |
+
def get_relative_entity(doc, sentence, ent):
|
71 |
+
global id
|
72 |
+
|
73 |
+
id = ""
|
74 |
+
rel_entity = get_level1(doc, sentence, ent)
|
75 |
+
# print(1111 ,rel_entity)
|
76 |
+
rel_entity = get_level2(doc, sentence, rel_entity)
|
77 |
+
# print(2222 ,rel_entity)
|
78 |
+
rel_entity = get_level3(doc, sentence, rel_entity)
|
79 |
+
# print(3333 ,rel_entity)
|
80 |
+
|
81 |
+
if("_" in id):
|
82 |
+
rel_entity = doc.char_span(rel_entity.start_char, rel_entity.end_char, "RSE")
|
83 |
+
rel_entity._.rse_id = id
|
84 |
+
|
85 |
+
# print(id, 'idid')
|
86 |
+
# print(rel_entity._.rse_id, '._._')
|
87 |
+
|
88 |
+
return rel_entity
|
89 |
+
rel_entity = doc.char_span(ent.start_char, ent.end_char, ent.label_)
|
90 |
+
rel_entity._.rse_id = id
|
91 |
+
# print(4444 ,rel_entity)
|
92 |
+
return rel_entity
|
93 |
+
|
94 |
+
@Language.component("spatial_pipeline")
|
95 |
+
def get_spatial_ent(doc):
|
96 |
+
set_extension()
|
97 |
+
new_ents = []
|
98 |
+
# ents = [ent for ent in doc.ents if ent.label_ == "GPE" or ent.label_ == "LOC"] # 筛选出ase
|
99 |
+
|
100 |
+
|
101 |
+
# LLM 输出
|
102 |
+
# GPE = '[###Pyrmont###, ###Glebe###]' # LLM 输出的实体
|
103 |
+
GPE = llm_ent_extract.extract_GPE(doc.text) # LLM 输出的实体
|
104 |
+
print(doc.text, 'llmin')
|
105 |
+
print(GPE, 'llout')
|
106 |
+
|
107 |
+
GPE = llm_ent_extract.extract(GPE, 'GPE')
|
108 |
+
print(GPE, 'llmout2')
|
109 |
+
update_entities(doc, GPE, True)
|
110 |
+
ents = doc.ents
|
111 |
+
print(ents, 'eee')
|
112 |
+
# print(doc, 'ddd')
|
113 |
+
# print(ents, 'ddd')
|
114 |
+
# GPE = llm_ent_extract.extract(llm_ent_extract.extract_GPE(doc.text), 'gpe')
|
115 |
+
# update_entities(doc, GPE)
|
116 |
+
# LLM 输出完毕
|
117 |
+
|
118 |
+
|
119 |
+
# print(doc.ents, 111)
|
120 |
+
# print(doc.ents[2], 222)
|
121 |
+
# print(type(doc.ents[2]), 222)
|
122 |
+
# print(doc.ents[2].label_, 333)
|
123 |
+
# print('----------')
|
124 |
+
# doc.ents[2] = 'pp'
|
125 |
+
# print(doc.ents[2], 111)
|
126 |
+
# print(doc.ents[2].label_, 222)
|
127 |
+
# print(type(doc.ents), 333)
|
128 |
+
end = None
|
129 |
+
for ent in ents:
|
130 |
+
|
131 |
+
if ent.end != len(doc):
|
132 |
+
next_token = doc[ent.end]
|
133 |
+
if end is not None:
|
134 |
+
start = end
|
135 |
+
else:
|
136 |
+
start = ent.sent.start
|
137 |
+
if next_token.text.lower() in regex_spatial.get_keywords():
|
138 |
+
end = next_token.i
|
139 |
+
else:
|
140 |
+
end = ent.end
|
141 |
+
|
142 |
+
else:
|
143 |
+
start = ent.sent.start
|
144 |
+
end = ent.end
|
145 |
+
|
146 |
+
# print(doc, '//',start, '//', end, 999888)
|
147 |
+
# print(doc[start],'//', doc[end])
|
148 |
+
# print(ents, 999)
|
149 |
+
|
150 |
+
|
151 |
+
rsi_ent = get_relative_entity(doc,Span(doc, start, end), ent)
|
152 |
+
# print(doc.ents[0]._.rse_id, '._._2')
|
153 |
+
|
154 |
+
|
155 |
+
# print(rsi_ent.text, rsi_ent.label_, rsi_ent._.rse_id)
|
156 |
+
new_ents.append(rsi_ent)
|
157 |
+
|
158 |
+
doc.ents = new_ents
|
159 |
+
return doc
|
160 |
+
|
161 |
+
# def update_doc_ents(doc, new_dict):
|
162 |
+
# """
|
163 |
+
# 更新 doc.ents, 将新的实体文本和标签添加到 doc 中。
|
164 |
+
#
|
165 |
+
# 参数:
|
166 |
+
# - doc: spaCy 的 Doc 对象
|
167 |
+
# - new_dict: 一个字典,键是实体文本,值是标签
|
168 |
+
# """
|
169 |
+
# modified_ents = []
|
170 |
+
#
|
171 |
+
# # 遍历字典中的实体文本和标签
|
172 |
+
# for ent_text, label in new_dict.items():
|
173 |
+
# # 将实体文本拆分成单词
|
174 |
+
# ent_words = ent_text.split()
|
175 |
+
#
|
176 |
+
# # 遍历 doc 中的 token 来查找第一个单词
|
177 |
+
# start = None
|
178 |
+
# for i in range(len(doc)):
|
179 |
+
# # 如果当前 token 和实体的第一个单词匹配,确定 start
|
180 |
+
# if doc[i].text == ent_words[0]:
|
181 |
+
# start = i
|
182 |
+
# # 然后检查后续的单词是否都匹配
|
183 |
+
# end = start + len(ent_words) # 计算 end 为 start + 单词数
|
184 |
+
# if all(doc[start + j].text == ent_words[j] for j in range(len(ent_words))):
|
185 |
+
# # 创建 Span 对象
|
186 |
+
# new_ent = Span(doc, start, end, label=label)
|
187 |
+
# modified_ents.append(new_ent)
|
188 |
+
# break # 找到匹配后跳出循环
|
189 |
+
#
|
190 |
+
# # 使用 doc.set_ents() 更新 doc.ents
|
191 |
+
# doc.set_ents(modified_ents)
|
192 |
+
#
|
193 |
+
#
|
194 |
+
# # def llm_extract(doc, model):
|
195 |
+
#
|
196 |
+
# def split_doc_into_sentences(doc):
|
197 |
+
# """
|
198 |
+
# 将 doc 的文本按句子分割,并返回每个句子的字符串列表。
|
199 |
+
# """
|
200 |
+
# sentence_list = [sent.text.strip() for sent in doc.sents]
|
201 |
+
# return sentence_list
|
202 |
+
#
|
203 |
+
#
|
204 |
+
# @Language.component("spatial_pipeline")
|
205 |
+
# def get_spatial_ent(doc):
|
206 |
+
#
|
207 |
+
# set_extension()
|
208 |
+
#
|
209 |
+
# split_sent = split_doc_into_sentences(doc)
|
210 |
+
# for i in range(len(split_sent)):
|
211 |
+
# gpe_dict = llm_ent_extract.extract_GPE(split_sent[i])
|
212 |
+
# loc_dict = llm_ent_extract.extract_LOC(split_sent[i])
|
213 |
+
# new_dict = gpe_dict|loc_dict
|
214 |
+
#
|
215 |
+
#
|
216 |
+
# print(gpe_dict, '111')
|
217 |
+
# print(loc_dict)
|
218 |
+
# print(new_dict)
|
219 |
+
# # new_dict = {'pp': 'ORG', 'France': 'GPE', 'Paris': 'GPE'}
|
220 |
+
#
|
221 |
+
#
|
222 |
+
# # 调用新的函数更新 doc 的实体
|
223 |
+
# update_doc_ents(doc, new_dict)
|
224 |
+
#
|
225 |
+
# # 继续处理 doc.ents
|
226 |
+
# ents = [ent for ent in doc.ents if ent.label_ == "GPE" or ent.label_ == "LOC"]
|
227 |
+
# print(ents[1].label_)
|
228 |
+
#
|
229 |
+
# end = None
|
230 |
+
# new_ents = []
|
231 |
+
#
|
232 |
+
# for ent in ents:
|
233 |
+
# if ent.end != len(doc):
|
234 |
+
# next_token = doc[ent.end + 1]
|
235 |
+
# if end is not None:
|
236 |
+
# start = end
|
237 |
+
# else:
|
238 |
+
# start = ent.sent.start
|
239 |
+
# if next_token.text.lower() in regex_spatial.get_keywords():
|
240 |
+
# end = next_token.i
|
241 |
+
# else:
|
242 |
+
# end = ent.end
|
243 |
+
# else:
|
244 |
+
# start = ent.sent.start
|
245 |
+
# end = ent.end
|
246 |
+
#
|
247 |
+
# # 调用 get_relative_entity 来获得新的实体信息
|
248 |
+
# rsi_ent = get_relative_entity(doc, Span(doc, start, end), ent)
|
249 |
+
#
|
250 |
+
# # 将处理后的实体添加到新的实体列表中
|
251 |
+
# new_ents.append(rsi_ent)
|
252 |
+
#
|
253 |
+
# doc.ents = new_ents # 更新 doc.ents
|
254 |
+
# print(new_ents, '111222')
|
255 |
+
#
|
256 |
+
# return doc
|
regex_spatial.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Created on Tue Jul 26 14:57:18 2022
|
5 |
+
|
6 |
+
@author: syed
|
7 |
+
"""
|
8 |
+
|
9 |
+
from quantities import units as u
|
10 |
+
from quantities import Quantity
|
11 |
+
|
12 |
+
one_plus = "+"
|
13 |
+
zero_plus = "*"
|
14 |
+
|
15 |
+
|
16 |
+
def get_quantities_regex():
|
17 |
+
# myList = [unit for unit in dir(u.length)
|
18 |
+
# if type(getattr(u.length, unit)) is u.length ]
|
19 |
+
myList = [unit for unit in dir(u.length) if isinstance(getattr(u.length, unit), Quantity)]
|
20 |
+
|
21 |
+
units = [ x for x in myList if "_" not in x ]
|
22 |
+
units_regex = '|'.join(units)
|
23 |
+
return "["+units_regex+"]"
|
24 |
+
def get_number_regex():
|
25 |
+
regex = "[0-9]"
|
26 |
+
return regex
|
27 |
+
def get_space_regex():
|
28 |
+
regex = "\s"
|
29 |
+
return regex
|
30 |
+
|
31 |
+
def get_directional_regex():
|
32 |
+
cardinals_kwds = "north|south|east|west"
|
33 |
+
ordinals_kwds = "north-east|north-west|south-east|south-west|north east|north west|south east|south west|northeast|northwest|southeast|southwest"
|
34 |
+
symbols_kwds = "N'|S'|E'|W'|NE'|NW'|SE'|SW'"
|
35 |
+
return ordinals_kwds+"|"+symbols_kwds+"|"+cardinals_kwds
|
36 |
+
|
37 |
+
def get_center_regex():
|
38 |
+
center_kwds = "center|central|downtown|midtown"
|
39 |
+
return center_kwds
|
40 |
+
|
41 |
+
def get_near_regex():
|
42 |
+
near_kwds = "nearby|near|vicinity|close|beside|next|adjacent|immediate|border"
|
43 |
+
return near_kwds
|
44 |
+
|
45 |
+
def get_surrounding_regex():
|
46 |
+
surrounding_kwds = "surrounding|neigbourhood|proximity|territory|locality"
|
47 |
+
return surrounding_kwds
|
48 |
+
def get_level1_regex():
|
49 |
+
level_1_regex = "(?i)("+get_directional_regex()+"|"+get_center_regex()+")"
|
50 |
+
return level_1_regex
|
51 |
+
|
52 |
+
def get_level2_regex():
|
53 |
+
level_2_regex = "(?i)("+get_near_regex()+"|"+get_surrounding_regex()+")"
|
54 |
+
return level_2_regex
|
55 |
+
|
56 |
+
def get_level3_regex():
|
57 |
+
level_3_regex = "(?i)("+get_number_regex()+one_plus+get_space_regex()+zero_plus+get_quantities_regex()+one_plus+")"
|
58 |
+
return level_3_regex
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
def get_keywords():
|
63 |
+
keywords = []
|
64 |
+
keywords = get_directional_regex().split("|")
|
65 |
+
keywords.extend(get_near_regex().split("|"))
|
66 |
+
keywords.extend(get_surrounding_regex().split("|"))
|
67 |
+
keywords.extend(get_center_regex().split("|"))
|
68 |
+
keywords.append(",")
|
69 |
+
keywords.append("and")
|
70 |
+
keywords.append(".")
|
71 |
+
|
72 |
+
return keywords
|
utils/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
utils/__pycache__/config.cpython-310.pyc
ADDED
Binary file (329 Bytes). View file
|
|
utils/__pycache__/geoutil.cpython-310.pyc
ADDED
Binary file (6.4 kB). View file
|
|
utils/__pycache__/geoutil.cpython-39.pyc
ADDED
Binary file (5.95 kB). View file
|
|
utils/__pycache__/llm_coding.cpython-310.pyc
ADDED
Binary file (12 kB). View file
|
|
utils/__pycache__/llm_ent_extract.cpython-310.pyc
ADDED
Binary file (6.78 kB). View file
|
|
utils/config.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
api_key = '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'
|
utils/geoutil.py
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Created on Tue Aug 2 12:45:21 2022
|
5 |
+
|
6 |
+
@author: syed
|
7 |
+
"""
|
8 |
+
|
9 |
+
import math
|
10 |
+
import re
|
11 |
+
import regex_spatial
|
12 |
+
import quantities as pq
|
13 |
+
from math import radians, cos, sin, asin, sqrt
|
14 |
+
import quantities as pq
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
def get_kilometers(d, unit):
|
19 |
+
q = float(d) * pq.CompoundUnit(unit)
|
20 |
+
q.units = pq.km
|
21 |
+
return q.magnitude
|
22 |
+
|
23 |
+
def ConvertToRadian(input):
|
24 |
+
return input * math.pi / 180
|
25 |
+
|
26 |
+
def get_level1(ent):
|
27 |
+
level_1 = re.search(regex_spatial.get_level1_regex(), ent)
|
28 |
+
if level_1 is not None:
|
29 |
+
return level_1.group()
|
30 |
+
return None
|
31 |
+
def get_level2(ent):
|
32 |
+
level_2 = re.search(regex_spatial.get_level2_regex(), ent)
|
33 |
+
if level_2 is not None:
|
34 |
+
return level_2.group()
|
35 |
+
return None
|
36 |
+
def get_level3(ent):
|
37 |
+
level_3 = re.search(regex_spatial.get_level3_regex(), ent)
|
38 |
+
if level_3 is not None:
|
39 |
+
return level_3.group()
|
40 |
+
return None
|
41 |
+
|
42 |
+
def get_ase(ent):
|
43 |
+
abs_sp = ent.split("_")
|
44 |
+
return abs_sp[len(abs_sp)-1]
|
45 |
+
|
46 |
+
def get_ent(ent):
|
47 |
+
return get_ase(ent), get_level1(ent), get_level2(ent), get_level3(ent)
|
48 |
+
|
49 |
+
def get_centroid(coordinates, centroid, mini, maxi):
|
50 |
+
average = (mini + maxi)/2
|
51 |
+
diff = []
|
52 |
+
ind = 0
|
53 |
+
for p in coordinates:
|
54 |
+
diff.append(abs( p[2] - average))
|
55 |
+
|
56 |
+
ind = diff.index(min(diff))
|
57 |
+
|
58 |
+
return midpoint(centroid[0], centroid[1], coordinates[ind][0], coordinates[ind][1], average)
|
59 |
+
|
60 |
+
|
61 |
+
def calculateArea(coordinates):
|
62 |
+
area = 0
|
63 |
+
if (len(coordinates) > 2):
|
64 |
+
i = 0
|
65 |
+
for i in range(len(coordinates) - 1):
|
66 |
+
p1 = coordinates[i]
|
67 |
+
p2 = coordinates[i + 1]
|
68 |
+
area += math.radians(p2[0] - p1[0]) * (2 + math.sin(ConvertToRadian(p1[1])) + math.sin(math.radians(p2[0])))
|
69 |
+
|
70 |
+
|
71 |
+
area = area * 6378137 * 6378137 / 1000000
|
72 |
+
|
73 |
+
area = abs(round(area, 2)) + 2
|
74 |
+
|
75 |
+
return area
|
76 |
+
|
77 |
+
def get_midmid_point(centroid, point1, point2, is_midmid):
|
78 |
+
mid1 = midpoint(centroid[0], centroid[1],
|
79 |
+
point1[0], point1[1]
|
80 |
+
, point1[2])
|
81 |
+
mid2 = midpoint(centroid[0], centroid[1],
|
82 |
+
point2[0], point2[1],
|
83 |
+
point2[2])
|
84 |
+
midmid1 = midpoint(centroid[0], centroid[1],
|
85 |
+
mid1[0], mid1[1]
|
86 |
+
, mid1[2])
|
87 |
+
midmid2 = midpoint(centroid[0], centroid[1],
|
88 |
+
mid2[0], mid2[1],
|
89 |
+
mid2[2])
|
90 |
+
if is_midmid:
|
91 |
+
return midmid1, midmid2
|
92 |
+
else:
|
93 |
+
return mid1, mid2
|
94 |
+
|
95 |
+
def getPointByDistanceAngle(lat, ln, angle, distanceInKm):
|
96 |
+
|
97 |
+
R = 6378.1 #Radius of the Earth
|
98 |
+
brng = angle * math.pi /180 #Bearing is 90 degrees converted to radians.
|
99 |
+
d = distanceInKm #Distance in km
|
100 |
+
|
101 |
+
#lat2 52.20444 - the lat result I'm hoping for
|
102 |
+
#lon2 0.36056 - the long result I'm hoping for.
|
103 |
+
|
104 |
+
lat1 = math.radians(lat) #Current lat point converted to radians
|
105 |
+
lon1 = math.radians(ln) #Current long point converted to radians
|
106 |
+
|
107 |
+
lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
|
108 |
+
math.cos(lat1)*math.sin(d/R)*math.cos(brng))
|
109 |
+
|
110 |
+
lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
|
111 |
+
math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
|
112 |
+
|
113 |
+
lat2 = math.degrees(lat2)
|
114 |
+
lon2 = math.degrees(lon2)
|
115 |
+
|
116 |
+
return [lon2, lat2, angle]
|
117 |
+
|
118 |
+
def midpoint(x1, y1, x2, y2, angle):
|
119 |
+
|
120 |
+
lonA = math.radians(y1)
|
121 |
+
lonB = math.radians(y2)
|
122 |
+
latA = math.radians(x1)
|
123 |
+
latB = math.radians(x2)
|
124 |
+
|
125 |
+
dLon = lonB - lonA
|
126 |
+
|
127 |
+
Bx = math.cos(latB) * math.cos(dLon)
|
128 |
+
By = math.cos(latB) * math.sin(dLon)
|
129 |
+
|
130 |
+
latC = math.atan2(math.sin(latA) + math.sin(latB),
|
131 |
+
math.sqrt((math.cos(latA) + Bx) * (math.cos(latA) + Bx) + By * By))
|
132 |
+
lonC = lonA + math.atan2(By, math.cos(latA) + Bx)
|
133 |
+
lonC = (lonC + 3 * math.pi) % (2 * math.pi) - math.pi
|
134 |
+
latitude = round(math.degrees(latC), 8)
|
135 |
+
longitude = round(math.degrees(lonC),8)
|
136 |
+
return [latitude, longitude, angle]
|
137 |
+
|
138 |
+
def midpoint(y1, x1, y2, x2, angle):
|
139 |
+
|
140 |
+
lonA = math.radians(y1)
|
141 |
+
lonB = math.radians(y2)
|
142 |
+
latA = math.radians(x1)
|
143 |
+
latB = math.radians(x2)
|
144 |
+
|
145 |
+
dLon = lonB - lonA
|
146 |
+
|
147 |
+
Bx = math.cos(latB) * math.cos(dLon)
|
148 |
+
By = math.cos(latB) * math.sin(dLon)
|
149 |
+
|
150 |
+
latC = math.atan2(math.sin(latA) + math.sin(latB),
|
151 |
+
math.sqrt((math.cos(latA) + Bx) * (math.cos(latA) + Bx) + By * By))
|
152 |
+
lonC = lonA + math.atan2(By, math.cos(latA) + Bx)
|
153 |
+
lonC = (lonC + 3 * math.pi) % (2 * math.pi) - math.pi
|
154 |
+
latitude = round(math.degrees(latC), 8)
|
155 |
+
longitude = round(math.degrees(lonC) ,8)
|
156 |
+
print([longitude, latitude, angle], 'midmid')
|
157 |
+
return [longitude, latitude, angle
|
158 |
+
|
159 |
+
]
|
160 |
+
|
161 |
+
def calculate_bearing(pointA, pointB):
|
162 |
+
|
163 |
+
if (type(pointA) != tuple) or (type(pointB) != tuple):
|
164 |
+
return 400
|
165 |
+
if (type(pointB[0]) != float) or (type(pointB[0]) != float):
|
166 |
+
return 400
|
167 |
+
|
168 |
+
lat1 = math.radians(pointA[0])
|
169 |
+
lat2 = math.radians(pointB[0])
|
170 |
+
|
171 |
+
diffLong = math.radians(pointB[1] - pointA[1])
|
172 |
+
|
173 |
+
x = math.sin(diffLong) * math.cos(lat2)
|
174 |
+
y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
|
175 |
+
* math.cos(lat2) * math.cos(diffLong))
|
176 |
+
|
177 |
+
initial_bearing = math.atan2(x, y)
|
178 |
+
|
179 |
+
|
180 |
+
initial_bearing = math.degrees(initial_bearing)
|
181 |
+
compass_bearing = (initial_bearing + 360) % 360
|
182 |
+
|
183 |
+
return compass_bearing
|
184 |
+
|
185 |
+
def getPointByDistanceAngle(lat, ln, angle, distance, unit):
|
186 |
+
|
187 |
+
#distanceInKm = distance
|
188 |
+
R = 6378.1 #Radius of the Earth
|
189 |
+
brng = float(angle) * math.pi /180 #Bearing is 90 degrees converted to radians.
|
190 |
+
d = get_kilometers(distance, unit) #Distance in km
|
191 |
+
|
192 |
+
|
193 |
+
lat1 = math.radians(lat) #Current lat point converted to radians
|
194 |
+
lon1 = math.radians(ln) #Current long point converted to radians
|
195 |
+
|
196 |
+
lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
|
197 |
+
math.cos(lat1)*math.sin(d/R)*math.cos(brng))
|
198 |
+
|
199 |
+
lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
|
200 |
+
math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
|
201 |
+
|
202 |
+
lat2 = math.degrees(lat2)
|
203 |
+
lon2 = math.degrees(lon2)
|
204 |
+
|
205 |
+
return (round(lon2,8), round(lat2,8), angle)
|
206 |
+
|
207 |
+
|
208 |
+
def calculatePointByDistance(lat, ln, angle, distance, unit):
|
209 |
+
coff = 100/(6378*1.56)
|
210 |
+
kms = get_kilometers(distance, unit)
|
211 |
+
|
212 |
+
d = kms * coff
|
213 |
+
|
214 |
+
angle_x = math.cos( angle ) # * math.pi/180
|
215 |
+
angle_y = math.sin( angle) # * math.pi/180
|
216 |
+
lat_new = lat + (d * angle_x)
|
217 |
+
ln_new = ln + (d * angle_y)
|
218 |
+
|
219 |
+
return (round(ln_new,8), round(lat_new,8), angle)
|
220 |
+
|
221 |
+
|
222 |
+
|
223 |
+
def pointByAngle(lat, ln, angle, distance, unit):
|
224 |
+
|
225 |
+
R = 6378.1 #Radius of the Earth
|
226 |
+
brng = angle * math.pi /180 #Bearing is 90 degrees converted to radians.
|
227 |
+
d = get_kilometers(distance, unit) #Distance in km
|
228 |
+
|
229 |
+
#lat2 52.20444 - the lat result I'm hoping for
|
230 |
+
#lon2 0.36056 - the long result I'm hoping for.
|
231 |
+
|
232 |
+
lat1 = math.radians(lat) #Current lat point converted to radians
|
233 |
+
lon1 = math.radians(ln) #Current long point converted to radians
|
234 |
+
|
235 |
+
lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
|
236 |
+
math.cos(lat1)*math.sin(d/R)*math.cos(brng))
|
237 |
+
|
238 |
+
lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
|
239 |
+
math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
|
240 |
+
|
241 |
+
lat2 = math.degrees(lat2)
|
242 |
+
lon2 = math.degrees(lon2)
|
243 |
+
|
244 |
+
return (lon2, lat2, angle)
|
245 |
+
|
246 |
+
|
247 |
+
def getPointByDistance(lat, ln, angle, distance, unit):
|
248 |
+
kms = get_kilometers(distance, unit)
|
249 |
+
coef = kms / 111.32
|
250 |
+
new_lat = lat + coef
|
251 |
+
new_long = ln + coef / math.cos(lat * 0.01745)
|
252 |
+
return (round(new_lat,8), round(new_long,8), angle)
|
253 |
+
|
254 |
+
def haversine(lon1, lat1, lon2, lat2):
|
255 |
+
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
|
256 |
+
dlon = lon2 - lon1
|
257 |
+
dlat = lat2 - lat1
|
258 |
+
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
259 |
+
c = 2 * asin(sqrt(a))
|
260 |
+
km = 6371* c
|
261 |
+
return km
|
262 |
+
|
utils/llm_coding.py
ADDED
@@ -0,0 +1,582 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import urllib3
|
3 |
+
import json
|
4 |
+
from utils import geoutil
|
5 |
+
import regex_spatial
|
6 |
+
from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping
|
7 |
+
import re
|
8 |
+
import geopandas as gpd
|
9 |
+
from geocoder import geo_level1
|
10 |
+
from openai import OpenAI
|
11 |
+
from utils.config import api_key
|
12 |
+
|
13 |
+
client = OpenAI(
|
14 |
+
api_key=api_key
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
model = "gpt-4o"
|
19 |
+
|
20 |
+
north = ["north", "N'", "North", "NORTH"]
|
21 |
+
south = ["south", "S'", "South", "SOUTH"]
|
22 |
+
east = ["east", "E'", "East", "EAST"]
|
23 |
+
west = ["west", "W'", "West", "WEST"]
|
24 |
+
northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"]
|
25 |
+
southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"]
|
26 |
+
northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"]
|
27 |
+
southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"]
|
28 |
+
center = ["center","central", "downtown","midtown"]
|
29 |
+
#
|
30 |
+
#
|
31 |
+
# def get_directional_coordinates(coordinates, direction, centroid, minimum, maximum, is_midmid):
|
32 |
+
# direction_coordinates = get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum)
|
33 |
+
# midmid1, midmid2 = geoutil.get_midmid_point(centroid, direction_coordinates[0], direction_coordinates[-1],
|
34 |
+
# is_midmid)
|
35 |
+
# if direction in west:
|
36 |
+
# maxi = max(p[2] for p in direction_coordinates)
|
37 |
+
# mini = min(p[2] for p in direction_coordinates)
|
38 |
+
# index_mini = 0
|
39 |
+
# index_maxi = 0
|
40 |
+
# for idx, p in enumerate(direction_coordinates):
|
41 |
+
# if p[2] == mini:
|
42 |
+
# index_mini = idx
|
43 |
+
# if p[2] == maxi:
|
44 |
+
# index_maxi = idx
|
45 |
+
#
|
46 |
+
# direction_coordinates.insert(index_maxi + 1, midmid2)
|
47 |
+
# direction_coordinates.insert(index_mini + 1, midmid1)
|
48 |
+
# else:
|
49 |
+
# direction_coordinates.append(midmid2)
|
50 |
+
# direction_coordinates.append(midmid1)
|
51 |
+
#
|
52 |
+
# return direction_coordinates, midmid1, midmid2
|
53 |
+
#
|
54 |
+
#
|
55 |
+
# def get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum):
|
56 |
+
# direction_coordinates = []
|
57 |
+
# for p in coordinates:
|
58 |
+
# if direction in east:
|
59 |
+
# if p[2] >= minimum or p[2] <= maximum:
|
60 |
+
# direction_coordinates.append(p)
|
61 |
+
#
|
62 |
+
# else:
|
63 |
+
# if p[2] >= minimum and p[2] <= maximum:
|
64 |
+
# direction_coordinates.append(p)
|
65 |
+
# return direction_coordinates
|
66 |
+
#
|
67 |
+
#
|
68 |
+
# def get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum):
|
69 |
+
# direction_coordinates = []
|
70 |
+
# for p in coordinates:
|
71 |
+
# if direction in east:
|
72 |
+
# if p[2] >= minimum or p[2] <= maximum:
|
73 |
+
# direction_coordinates.append(p)
|
74 |
+
#
|
75 |
+
# else:
|
76 |
+
# if p[2] >= minimum and p[2] <= maximum:
|
77 |
+
# direction_coordinates.append(p)
|
78 |
+
# return direction_coordinates
|
79 |
+
#
|
80 |
+
#
|
81 |
+
# def get_central(coordinates, centroid, direction, is_midmid):
|
82 |
+
# n_min_max = get_min_max("north")
|
83 |
+
# n_coordinates = get_directional_coordinates_by_angle(coordinates, "north", n_min_max[0], n_min_max[1])
|
84 |
+
# n_mid1, n_mid2 = geoutil.get_midmid_point(centroid, n_coordinates[0], n_coordinates[-1], is_midmid)
|
85 |
+
#
|
86 |
+
# ne_min_max = get_min_max("north east")
|
87 |
+
# ne_coordinates = get_directional_coordinates_by_angle(coordinates, "north east", ne_min_max[0], ne_min_max[1])
|
88 |
+
# ne_mid1, ne_mid2 = geoutil.get_midmid_point(centroid, ne_coordinates[0], ne_coordinates[-1], is_midmid)
|
89 |
+
#
|
90 |
+
# e_min_max = get_min_max("east")
|
91 |
+
# e_coordinates = get_directional_coordinates_by_angle(coordinates, "east", e_min_max[0], e_min_max[1])
|
92 |
+
# e_mid1, e_mid2 = geoutil.get_midmid_point(centroid, e_coordinates[0], e_coordinates[-1], is_midmid)
|
93 |
+
#
|
94 |
+
# se_min_max = get_min_max("south east")
|
95 |
+
# se_coordinates = get_directional_coordinates_by_angle(coordinates, "south east", se_min_max[0], se_min_max[1])
|
96 |
+
# se_mid1, se_mid2 = geoutil.get_midmid_point(centroid, se_coordinates[0], se_coordinates[-1], is_midmid)
|
97 |
+
#
|
98 |
+
# s_min_max = get_min_max("south")
|
99 |
+
# s_coordinates = get_directional_coordinates_by_angle(coordinates, "south", s_min_max[0], s_min_max[1])
|
100 |
+
# s_mid1, s_mid2 = geoutil.get_midmid_point(centroid, s_coordinates[0], s_coordinates[-1], is_midmid)
|
101 |
+
#
|
102 |
+
# sw_min_max = get_min_max("south west")
|
103 |
+
# sw_coordinates = get_directional_coordinates_by_angle(coordinates, "south west", sw_min_max[0], sw_min_max[1])
|
104 |
+
# sw_mid1, sw_mid2 = geoutil.get_midmid_point(centroid, sw_coordinates[0], sw_coordinates[-1], is_midmid)
|
105 |
+
#
|
106 |
+
# w_min_max = get_min_max("west")
|
107 |
+
# w_coordinates = get_directional_coordinates_by_angle(coordinates, "west", w_min_max[0], w_min_max[1])
|
108 |
+
# w_mid1, w_mid2 = geoutil.get_midmid_point(centroid, w_coordinates[0], w_coordinates[-1], is_midmid)
|
109 |
+
#
|
110 |
+
# nw_min_max = get_min_max("north west")
|
111 |
+
# nw_coordinates = get_directional_coordinates_by_angle(coordinates, "north west", nw_min_max[0], nw_min_max[1])
|
112 |
+
# nw_mid1, nw_mid2 = geoutil.get_midmid_point(centroid, nw_coordinates[0], nw_coordinates[-1], is_midmid)
|
113 |
+
#
|
114 |
+
# central_coordindates = [e_mid1, e_mid2, ne_mid1, ne_mid2, n_mid1, n_mid2,
|
115 |
+
# nw_mid1, nw_mid2, w_mid1, w_mid2, sw_mid1, sw_mid2,
|
116 |
+
# s_mid1, s_mid2, se_mid1, se_mid2]
|
117 |
+
# return central_coordindates
|
118 |
+
#
|
119 |
+
#
|
120 |
+
# def get_min_max(direction):
|
121 |
+
# regex = regex_spatial.get_directional_regex()
|
122 |
+
# direction_list = regex.split("|")
|
123 |
+
# if direction in direction_list:
|
124 |
+
# if direction in east:
|
125 |
+
# return (337, 22)
|
126 |
+
# if direction in northeast:
|
127 |
+
# return (22, 67)
|
128 |
+
# if direction in north:
|
129 |
+
# return (67, 112)
|
130 |
+
# if direction in northwest:
|
131 |
+
# return (112, 157)
|
132 |
+
# if direction in west:
|
133 |
+
# return (157, 202)
|
134 |
+
# if direction in southwest:
|
135 |
+
# return (202, 247)
|
136 |
+
# if direction in south:
|
137 |
+
# return (247, 292)
|
138 |
+
# if direction in southeast:
|
139 |
+
# return (292, 337)
|
140 |
+
#
|
141 |
+
# return None
|
142 |
+
# def get_level1_coordinates(coordinates, centroid, direction, is_midmid):
|
143 |
+
# min_max = get_min_max(direction)
|
144 |
+
# if min_max is not None:
|
145 |
+
# coordinates, mid1, mid2 = get_directional_coordinates(coordinates, direction, centroid, min_max[0], min_max[1], is_midmid)
|
146 |
+
# return coordinates, centroid, mid1, mid2
|
147 |
+
# elif direction.lower() in center:
|
148 |
+
# return get_central(coordinates, centroid, direction, is_midmid), centroid, None, None
|
149 |
+
# else:
|
150 |
+
# return coordinates, centroid, None, None
|
151 |
+
def to_standard_2d_list(data):
|
152 |
+
arr = np.array(data)
|
153 |
+
|
154 |
+
# 强制变成一维后 reshape,前提是元素总数是2的倍数
|
155 |
+
flat = arr.flatten()
|
156 |
+
if flat.size % 2 != 0:
|
157 |
+
raise ValueError("元素个数不是2的倍数,不能 reshape 成 [N, 2] 格式")
|
158 |
+
|
159 |
+
return flat.reshape(-1, 2).tolist()
|
160 |
+
|
161 |
+
|
162 |
+
def get_geojson(ent, arr, centroid):
|
163 |
+
poly_json = {}
|
164 |
+
poly_json['type'] = 'FeatureCollection'
|
165 |
+
poly_json['features'] = []
|
166 |
+
coordinates= []
|
167 |
+
coordinates.append(arr)
|
168 |
+
poly_json['features'].append({
|
169 |
+
'type':'Feature',
|
170 |
+
'id': ent,
|
171 |
+
'properties': {
|
172 |
+
'centroid': centroid
|
173 |
+
},
|
174 |
+
'geometry': {
|
175 |
+
'type':'Polygon',
|
176 |
+
'coordinates': coordinates
|
177 |
+
}
|
178 |
+
})
|
179 |
+
return poly_json
|
180 |
+
|
181 |
+
|
182 |
+
def get_coordinates(ent):
|
183 |
+
request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2'
|
184 |
+
headers = {
|
185 |
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
|
186 |
+
}
|
187 |
+
page = requests.get(request_url, headers=headers, verify=False)
|
188 |
+
json_content = json.loads(page.content)
|
189 |
+
all_coordinates = json_content[0]['geojson']['coordinates'][0]
|
190 |
+
centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
|
191 |
+
for p in all_coordinates:
|
192 |
+
p2 = (p[0], p[1])
|
193 |
+
angle = geoutil.calculate_bearing(centroid, p2)
|
194 |
+
p.append(angle)
|
195 |
+
|
196 |
+
geojson = get_geojson(ent, all_coordinates, centroid)
|
197 |
+
|
198 |
+
return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid']
|
199 |
+
|
200 |
+
def get_coordinates(location):
|
201 |
+
request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2'
|
202 |
+
|
203 |
+
print(request_url)
|
204 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
205 |
+
response = requests.get(request_url, headers=headers, verify=False)
|
206 |
+
json_content = json.loads(response.content)
|
207 |
+
# print(json_content)
|
208 |
+
if json_content[0]['geojson']['type'] == 'Polygon':
|
209 |
+
coordinates = json_content[0]['geojson']['coordinates'][0]
|
210 |
+
elif json_content[0]['geojson']['type'] == 'Point':
|
211 |
+
coordinates = json_content[0]['geojson']['coordinates']
|
212 |
+
else:
|
213 |
+
print(json_content[0]['geojson']['type'])
|
214 |
+
centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
|
215 |
+
return (coordinates, centroid)
|
216 |
+
|
217 |
+
|
218 |
+
# level3
|
219 |
+
def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum):
|
220 |
+
# minimum = 157
|
221 |
+
# maximum = 202
|
222 |
+
|
223 |
+
direction_coordinates = []
|
224 |
+
for p in coordinates:
|
225 |
+
angle = geoutil.calculate_bearing(centroid, p)
|
226 |
+
p2 = (p[0], p[1], angle)
|
227 |
+
if direction in geo_level1.east:
|
228 |
+
if angle >= minimum or angle <= maximum:
|
229 |
+
direction_coordinates.append(p2)
|
230 |
+
|
231 |
+
else:
|
232 |
+
if angle >= minimum and angle <= maximum:
|
233 |
+
direction_coordinates.append(p2)
|
234 |
+
# print(type(direction_coordinates[0]))
|
235 |
+
# if(direction in geo_level1.west):
|
236 |
+
# direction_coordinates.sort(key=lambda k: k[2], reverse=True)
|
237 |
+
|
238 |
+
return direction_coordinates
|
239 |
+
def get_level3(level3):
|
240 |
+
digits = re.findall('[0-9]+', level3)[0]
|
241 |
+
unit = re.findall('[A-Za-z]+', level3)[0]
|
242 |
+
return digits, unit
|
243 |
+
|
244 |
+
def get_direction_coordinates(coordinates, centroid, level1):
|
245 |
+
min_max = geo_level1.get_min_max(level1)
|
246 |
+
if min_max is not None:
|
247 |
+
coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1])
|
248 |
+
return coord
|
249 |
+
return coordinates
|
250 |
+
def sort_west(poly1, poly2, centroid):
|
251 |
+
coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"]
|
252 |
+
coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"]
|
253 |
+
coord1 = []
|
254 |
+
coord2 = []
|
255 |
+
coord = []
|
256 |
+
for c in coords1:
|
257 |
+
pol = list(c[::-1])
|
258 |
+
coord1.extend(pol)
|
259 |
+
for c in coords2:
|
260 |
+
pol = list(c[::-1])
|
261 |
+
coord2.extend(pol)
|
262 |
+
coo1 = []
|
263 |
+
coo2 = []
|
264 |
+
for p in coord1:
|
265 |
+
angle = geoutil.calculate_bearing(centroid, p)
|
266 |
+
if angle >= 157 and angle <= 202:
|
267 |
+
coo1.append((p[0], p[1], angle))
|
268 |
+
for p in coord2:
|
269 |
+
angle = geoutil.calculate_bearing(centroid, p)
|
270 |
+
if angle >= 157 and angle <= 202:
|
271 |
+
coo2.append((p[0], p[1], angle))
|
272 |
+
coo1.extend(coo2)
|
273 |
+
return coo1
|
274 |
+
|
275 |
+
|
276 |
+
def get_level3_coordinates(coordinates, level_3, level1):
|
277 |
+
distance, unit = get_level3(level_3)
|
278 |
+
kms = geoutil.get_kilometers(distance, unit)
|
279 |
+
coord = []
|
280 |
+
|
281 |
+
coords0, center = coordinates
|
282 |
+
|
283 |
+
if not isinstance(coords0, list) or len(coords0) < 3:
|
284 |
+
|
285 |
+
# 从原始点出发,根据方向移动距离 kms 得到新圆心
|
286 |
+
lat_km = 111.32
|
287 |
+
lon_km = 111.32 * np.cos(np.radians(center[1]))
|
288 |
+
|
289 |
+
dx = dy = 0
|
290 |
+
|
291 |
+
if level1 is not None:
|
292 |
+
if level1 in geo_level1.east:
|
293 |
+
dx = kms / lon_km
|
294 |
+
elif level1 in geo_level1.west:
|
295 |
+
dx = -kms / lon_km
|
296 |
+
elif level1 in geo_level1.north:
|
297 |
+
dy = kms / lat_km
|
298 |
+
elif level1 in geo_level1.south:
|
299 |
+
dy = -kms / lat_km
|
300 |
+
# 你也可以支持 northeast、southwest 等复合方向
|
301 |
+
|
302 |
+
new_center = (center[0] + dx, center[1] + dy)
|
303 |
+
|
304 |
+
# 用固定半径画个圆(例如半径2km)
|
305 |
+
r_km = 1 # 半径设为1km,你也可以设为其他值
|
306 |
+
|
307 |
+
circle_points = []
|
308 |
+
for theta in np.linspace(0, 360, num=100):
|
309 |
+
theta_rad = np.radians(theta)
|
310 |
+
d_lat = (np.sin(theta_rad) * r_km) / lat_km
|
311 |
+
d_lon = (np.cos(theta_rad) * r_km) / lon_km
|
312 |
+
circle_points.append((new_center[0] + d_lon, new_center[1] + d_lat))
|
313 |
+
|
314 |
+
# 输出中心(使用新圆心)
|
315 |
+
if circle_points:
|
316 |
+
center_point = MultiPoint(circle_points).centroid
|
317 |
+
center = (center_point.x, center_point.y)
|
318 |
+
else:
|
319 |
+
center = new_center
|
320 |
+
|
321 |
+
return circle_points, center
|
322 |
+
|
323 |
+
# 正常 polygon 流程
|
324 |
+
poly1 = Polygon(coords0)
|
325 |
+
polygon1 = gpd.GeoSeries(poly1)
|
326 |
+
|
327 |
+
# 生成环形区域
|
328 |
+
poly2 = polygon1.buffer(0.0095 * kms, join_style=2)
|
329 |
+
poly3 = polygon1.buffer(0.013 * kms, join_style=2)
|
330 |
+
poly = poly3.difference(poly2)
|
331 |
+
|
332 |
+
# 获取坐标
|
333 |
+
coords = mapping(poly)["features"][0]["geometry"]["coordinates"]
|
334 |
+
for c in coords:
|
335 |
+
pol = list(c[::-1])
|
336 |
+
coord.extend(pol)
|
337 |
+
|
338 |
+
# 方向裁剪
|
339 |
+
if level1 is not None:
|
340 |
+
coord = get_direction_coordinates(coord, coordinates[1], level1)
|
341 |
+
if level1 in geo_level1.west:
|
342 |
+
coord = sort_west(poly3, poly2, coordinates[1])
|
343 |
+
|
344 |
+
# 计算质心
|
345 |
+
if coord:
|
346 |
+
center_point = MultiPoint(coord).centroid
|
347 |
+
center = (center_point.x, center_point.y)
|
348 |
+
else:
|
349 |
+
center = coordinates[1]
|
350 |
+
|
351 |
+
return coord, center
|
352 |
+
# level 3 end
|
353 |
+
|
354 |
+
# between
|
355 |
+
def get_between_coordinates(coordinates1, coordinates2):
|
356 |
+
"""
|
357 |
+
计算两个区域之间的中间点,并生成一个等面积的圆形区域。
|
358 |
+
如果某个输入仅为点(坐标长度 < 3),则其面积设为 0;
|
359 |
+
如果两个输入都是点,则默认半径为 2km。
|
360 |
+
:param coordinates1: 第一个区域的边界坐标和中心点
|
361 |
+
:param coordinates2: 第二个区域的边界坐标和中心点
|
362 |
+
:return: 圆形区域的坐标集和圆心
|
363 |
+
"""
|
364 |
+
|
365 |
+
def is_valid_polygon(coords):
|
366 |
+
return isinstance(coords, list) and len(coords) >= 3
|
367 |
+
|
368 |
+
coords1, center1 = coordinates1
|
369 |
+
coords2, center2 = coordinates2
|
370 |
+
|
371 |
+
# 判断输入是否为合法多边形(>=3个点)
|
372 |
+
if is_valid_polygon(coords1):
|
373 |
+
poly1 = Polygon(coords1)
|
374 |
+
area1 = poly1.area
|
375 |
+
else:
|
376 |
+
area1 = 0
|
377 |
+
|
378 |
+
if is_valid_polygon(coords2):
|
379 |
+
poly2 = Polygon(coords2)
|
380 |
+
area2 = poly2.area
|
381 |
+
else:
|
382 |
+
area2 = 0
|
383 |
+
|
384 |
+
# 计算中心点(两个中心的中点)
|
385 |
+
midpoint = (
|
386 |
+
(center1[0] + center2[0]) / 2,
|
387 |
+
(center1[1] + center2[1]) / 2
|
388 |
+
)
|
389 |
+
|
390 |
+
# 如果两个区域都是点,则使用默认半径 2km
|
391 |
+
if area1 == 0 and area2 == 0:
|
392 |
+
r_km = 2
|
393 |
+
else:
|
394 |
+
avg_area = (area1 + area2) / 2
|
395 |
+
r_km = np.sqrt(avg_area / np.pi) * 111.32 # 近似 km 半径
|
396 |
+
|
397 |
+
# 经纬度距离换算因子
|
398 |
+
lat_km = 111.32
|
399 |
+
lon_km = 111.32 * np.cos(np.radians(midpoint[1]))
|
400 |
+
|
401 |
+
# 生成圆形区域坐标(100个点)
|
402 |
+
circle_points = []
|
403 |
+
for theta in np.linspace(0, 360, num=100):
|
404 |
+
theta_rad = np.radians(theta)
|
405 |
+
d_lat = (np.sin(theta_rad) * r_km) / lat_km
|
406 |
+
d_lon = (np.cos(theta_rad) * r_km) / lon_km
|
407 |
+
circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat))
|
408 |
+
|
409 |
+
return circle_points, midpoint
|
410 |
+
# between end
|
411 |
+
|
412 |
+
|
413 |
+
def llmapi(text):
|
414 |
+
system_prompt = (
|
415 |
+
"你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n"
|
416 |
+
"你能选择的定位函数有:\n"
|
417 |
+
"1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n"
|
418 |
+
"2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n"
|
419 |
+
"请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n"
|
420 |
+
"请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n"
|
421 |
+
"每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n"
|
422 |
+
"所有方向必须使用英文(如 south, west, northeast, etc.)。\n"
|
423 |
+
"示例输出:\n"
|
424 |
+
"<<<JSON>>>\n"
|
425 |
+
"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
|
426 |
+
"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
|
427 |
+
"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
|
428 |
+
"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
|
429 |
+
"<<<END>>>")
|
430 |
+
|
431 |
+
messages = [
|
432 |
+
{"role": "system", "content": system_prompt},
|
433 |
+
{"role": "user", "content": text},
|
434 |
+
]
|
435 |
+
|
436 |
+
chat_completion = client.chat.completions.create(
|
437 |
+
messages=messages,
|
438 |
+
model=model,
|
439 |
+
)
|
440 |
+
|
441 |
+
result = chat_completion.choices[0].message.content
|
442 |
+
json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
|
443 |
+
|
444 |
+
if json_match:
|
445 |
+
# print(json.loads(json_match.group(1)))
|
446 |
+
return json.loads(json_match.group(1))
|
447 |
+
else:
|
448 |
+
raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
|
449 |
+
def llmapi(text):
|
450 |
+
system_prompt = (
|
451 |
+
"You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n"
|
452 |
+
"The positioning functions you can choose from are:\n"
|
453 |
+
"1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n"
|
454 |
+
"2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n"
|
455 |
+
"You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations."
|
456 |
+
"First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n"
|
457 |
+
"Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n"
|
458 |
+
"Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n"
|
459 |
+
"All directions must be in English (e.g., south, west, northeast, etc.).\n"
|
460 |
+
"Example output:\n"
|
461 |
+
"<<<JSON>>>\n"
|
462 |
+
"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
|
463 |
+
"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
|
464 |
+
"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
|
465 |
+
"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
|
466 |
+
"<<<END>>>")
|
467 |
+
|
468 |
+
messages = [
|
469 |
+
{"role": "system", "content": system_prompt},
|
470 |
+
{"role": "user", "content": text},
|
471 |
+
]
|
472 |
+
|
473 |
+
chat_completion = client.chat.completions.create(
|
474 |
+
messages=messages,
|
475 |
+
model=model,
|
476 |
+
)
|
477 |
+
|
478 |
+
result = chat_completion.choices[0].message.content
|
479 |
+
print(result)
|
480 |
+
json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
|
481 |
+
|
482 |
+
if json_match:
|
483 |
+
return json.loads(json_match.group(1))
|
484 |
+
else:
|
485 |
+
raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
|
486 |
+
|
487 |
+
|
488 |
+
|
489 |
+
|
490 |
+
|
491 |
+
def execute_steps(steps):
|
492 |
+
data = {}
|
493 |
+
|
494 |
+
for step in steps:
|
495 |
+
step_id = step['id']
|
496 |
+
function = step['function']
|
497 |
+
inputs = step['inputs']
|
498 |
+
# print('-' * 50)
|
499 |
+
# print(function)
|
500 |
+
# print(inputs)
|
501 |
+
|
502 |
+
|
503 |
+
resolved_inputs = []
|
504 |
+
for inp in inputs:
|
505 |
+
if isinstance(inp, int):
|
506 |
+
resolved_inputs.append(data[inp])
|
507 |
+
else:
|
508 |
+
resolved_inputs.append(inp)
|
509 |
+
if function == "Relative":
|
510 |
+
location, direction, distance = resolved_inputs
|
511 |
+
if isinstance(location, str):
|
512 |
+
location = get_coordinates(location)
|
513 |
+
|
514 |
+
location = [to_standard_2d_list(location[0])] + list(location[1:])
|
515 |
+
location = [[[151.214901,-33.859175]], (151.214901,-33.859175)]
|
516 |
+
result = get_level3_coordinates(location, distance, direction)
|
517 |
+
data[step_id] = result
|
518 |
+
|
519 |
+
elif function == "Between":
|
520 |
+
|
521 |
+
|
522 |
+
location1, location2 = resolved_inputs
|
523 |
+
# print(location1)
|
524 |
+
# print(111)
|
525 |
+
# print(location2)
|
526 |
+
if isinstance(location1, str):
|
527 |
+
location1 = get_coordinates(location1)
|
528 |
+
|
529 |
+
location1 = [to_standard_2d_list(location1[0])] + list(location1[1:])
|
530 |
+
if isinstance(location2, str):
|
531 |
+
|
532 |
+
location2 = get_coordinates(location2)
|
533 |
+
location2 = [to_standard_2d_list(location2[0])] + list(location2[1:])
|
534 |
+
result = get_between_coordinates(location1, location2)
|
535 |
+
|
536 |
+
data[step_id] = result
|
537 |
+
|
538 |
+
return data
|
539 |
+
|
540 |
+
|
541 |
+
|
542 |
+
if __name__ == '__main__':
|
543 |
+
# a = get_coordinates('Burwood')
|
544 |
+
# a2 = get_coordinates('Glebe')
|
545 |
+
# b = get_level3_coordinates(a, '5 km', 'east')
|
546 |
+
# c = get_between_coordinates(a, a2)
|
547 |
+
|
548 |
+
# 完整通道
|
549 |
+
# 默认输入
|
550 |
+
# default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。"
|
551 |
+
# default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。"
|
552 |
+
# default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置"
|
553 |
+
# default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel."
|
554 |
+
# default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney."
|
555 |
+
|
556 |
+
|
557 |
+
|
558 |
+
# 解析 LLM 结果
|
559 |
+
# parsed_steps = llmapi(default_input_text)
|
560 |
+
# parsed_steps = [{'id': 1, 'function': 'Relative', 'inputs': ['Chatswood', 'south', '4 km']}, {'id': 2, 'function': 'Relative', 'inputs': ['North Sydney', 'east', '2 km']}, {'id': 3, 'function': 'Between', 'inputs': [1, 2]}, {'id': 4, 'function': 'Relative', 'inputs': [3, 'south west', '5 km']}]
|
561 |
+
# parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}]
|
562 |
+
# parsed_steps = [{"id": 1, "function": "Relative", "inputs": ["Katoomba", "southeast", "3 km"]}, {"id": 2, "function": "Between", "inputs": [1, "Echo Point"]}]
|
563 |
+
# parsed_steps = [{'id': 1, 'function': 'Relative', 'inputs': ['Scafell Pike', 'east', '9 km']}]
|
564 |
+
# parsed_steps = [{'id': 1, 'function': 'Relative', 'inputs': ['Colosseum', 'northeast', '8 km']}, {'id': 2, 'function': 'Relative', 'inputs': [1, 'northeast', '2 km']}]
|
565 |
+
parsed_steps = [
|
566 |
+
{"id": 1, "function": "Between", "inputs": ["Statue of Liberty", "Eiffel Tower"]},
|
567 |
+
{"id": 2, "function": "Relative", "inputs": [1, "west", "8 km"]}
|
568 |
+
]
|
569 |
+
|
570 |
+
# 执行步骤
|
571 |
+
result = execute_steps(parsed_steps)
|
572 |
+
# 输出最终计算结果
|
573 |
+
print(result)
|
574 |
+
print('-' * 100)
|
575 |
+
print(result[(max(result.keys()))][0])
|
576 |
+
# 通道结束
|
577 |
+
|
578 |
+
# location = get_coordinates('Chatswood')
|
579 |
+
# result = get_level3_coordinates(location, '4 km', 'north west')
|
580 |
+
# print(result)
|
581 |
+
|
582 |
+
|
utils/llm_ent_extract.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import re
|
3 |
+
from utils.config import api_key
|
4 |
+
|
5 |
+
client = OpenAI(
|
6 |
+
api_key=api_key
|
7 |
+
)
|
8 |
+
|
9 |
+
model = "gpt-3.5-turbo"
|
10 |
+
model = "gpt-4o"
|
11 |
+
|
12 |
+
def extract_GPE(text):
|
13 |
+
system_prompt = '''You are a professional geographer. Your task is to extract all geopolitical entities from a given text. Geopolitical entities can include countries, regions, cities, autonomous regions, or other administrative divisions. For each geopolitical entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
|
14 |
+
sent = 'Where is France?'
|
15 |
+
|
16 |
+
math_bot_messages = [
|
17 |
+
{"role": "system",
|
18 |
+
"content": system_prompt},
|
19 |
+
{"role": "user", "content": text},
|
20 |
+
]
|
21 |
+
|
22 |
+
chat_completion = client.chat.completions.create(
|
23 |
+
messages=math_bot_messages,
|
24 |
+
model=model,
|
25 |
+
)
|
26 |
+
|
27 |
+
result = chat_completion.choices[0].message.content
|
28 |
+
return result
|
29 |
+
|
30 |
+
|
31 |
+
def extract_LOC(text):
|
32 |
+
system_prompt = '''You are a professional geographer. Your task is to extract all location entities (LOC) from a given text. Location entities can include physical locations such as landmarks, geographical features, mountains, rivers, oceans, and places, but do not include political or administrative divisions such as countries or cities (these are considered geopolitical entities). For each location entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
|
33 |
+
sent = 'The Grand Canyon is one of the most spectacular natural wonders in the world, located in the state of Arizona. Nearby, the Colorado River flows through the canyon, carving its way through the rugged terrain. In the north, the Rocky Mountains stretch across several states, including Colorado and Wyoming.'
|
34 |
+
|
35 |
+
math_bot_messages = [
|
36 |
+
{"role": "system",
|
37 |
+
"content": system_prompt},
|
38 |
+
{"role": "user", "content": text},
|
39 |
+
]
|
40 |
+
|
41 |
+
chat_completion = client.chat.completions.create(
|
42 |
+
messages=math_bot_messages,
|
43 |
+
model=model,
|
44 |
+
)
|
45 |
+
|
46 |
+
result = chat_completion.choices[0].message.content
|
47 |
+
return result
|
48 |
+
|
49 |
+
|
50 |
+
def extract_RSE_1(text):
|
51 |
+
system_prompt = '''You are a professional geographer. Your task is to extract all spatial entities (directional keywords) from a given text. Spatial entities can include directional keywords such as north, south, east, west, and more specific terms like northeast, northwest, southeast, southwest, as well as terms indicating locations like center, central, downtown, and midtown. For each spatial entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The hotel is located in the downtown area of New York, just south of Central Park, with a beautiful view of the southeast corner."\n\nExpected Output:\n[###downtown###, ###south###, ###southeast###]'''
|
52 |
+
sent = 'The train station is situated in the central part of the city, just north of the river and east of the main square.'
|
53 |
+
|
54 |
+
math_bot_messages = [
|
55 |
+
{"role": "system",
|
56 |
+
"content": system_prompt},
|
57 |
+
{"role": "user", "content": text},
|
58 |
+
]
|
59 |
+
|
60 |
+
chat_completion = client.chat.completions.create(
|
61 |
+
messages=math_bot_messages,
|
62 |
+
model=model,
|
63 |
+
)
|
64 |
+
|
65 |
+
result = chat_completion.choices[0].message.content
|
66 |
+
return result
|
67 |
+
|
68 |
+
|
69 |
+
def extract_RSE_2(text):
|
70 |
+
system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy spatial entities (keywords) from a given text. Fuzzy spatial keywords can include terms like nearby, near, vicinity, close, beside, next, adjacent, immediate, border, surrounding, neighbourhood, proximity, territory, locality, and similar terms. For each fuzzy spatial keyword, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located nearby the lake, with several cafes close to the walking paths, and a small garden adjacent to the main entrance."\n\nExpected Output:\n[###nearby###, ###close###, ###adjacent###]'''
|
71 |
+
sent = 'The village is situated in the vicinity of the mountain range, with a small river flowing beside the houses and several farms next to the road.'
|
72 |
+
|
73 |
+
math_bot_messages = [
|
74 |
+
{"role": "system",
|
75 |
+
"content": system_prompt},
|
76 |
+
{"role": "user", "content": text},
|
77 |
+
]
|
78 |
+
|
79 |
+
chat_completion = client.chat.completions.create(
|
80 |
+
messages=math_bot_messages,
|
81 |
+
model=model,
|
82 |
+
)
|
83 |
+
|
84 |
+
result = chat_completion.choices[0].message.content
|
85 |
+
return result
|
86 |
+
|
87 |
+
|
88 |
+
def extract_RSE_3(text):
|
89 |
+
system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy distance keywords from a given text. Fuzzy distance keywords include numeric values followed by distance units such as kilometer, mile, meter, foot, inch, centimeter, and other related units. The distance units can be in different formats, such as km, m, mi, ft, yd, cm, mm, or even in full words like kilometer, mile, or inch. For each fuzzy distance keyword, wrap the entire expression (number and unit) in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located 3 km away from the city center, while the nearest supermarket is only 500 meters from here, and the lake is about 1 mile further down the road."\n\nExpected Output:\n[###3 km###, ###500 meters###, ###1 mile###]'''
|
90 |
+
sent = 'The school is located approximately 2 miles from the station, while the nearest bus stop is 200 meters away. The hiking trail is about 5 kilometers east of the town center.'
|
91 |
+
|
92 |
+
math_bot_messages = [
|
93 |
+
{"role": "system",
|
94 |
+
"content": system_prompt},
|
95 |
+
{"role": "user", "content": text},
|
96 |
+
]
|
97 |
+
|
98 |
+
chat_completion = client.chat.completions.create(
|
99 |
+
messages=math_bot_messages,
|
100 |
+
model=model,
|
101 |
+
)
|
102 |
+
|
103 |
+
result = chat_completion.choices[0].message.content
|
104 |
+
return result
|
105 |
+
|
106 |
+
|
107 |
+
def extract(respond, entity_type):
|
108 |
+
"""
|
109 |
+
从输入字符串中提取被 ### 包裹的实体,并将其映射到给定的实体类型。
|
110 |
+
|
111 |
+
:param respond: 包含实体的字符串,例如 '[###2 miles###, ###200 meters###, ###5 kilometers###]'
|
112 |
+
:param entity_type: 给所有提取的实体赋予的类型(字符串)
|
113 |
+
:return: 一个字典,键是提取出的实体,值是相同的 entity_type
|
114 |
+
"""
|
115 |
+
# 提取被 ### 包裹的内容
|
116 |
+
extracted_entities = re.findall(r'###(.*?)###', respond)
|
117 |
+
|
118 |
+
# 生成字典,将所有实体映射到同一个类型
|
119 |
+
entity_dict = {entity.strip(): entity_type for entity in extracted_entities}
|
120 |
+
|
121 |
+
return entity_dict
|
122 |
+
|
123 |
+
|
124 |
+
def llmapi(text):
|
125 |
+
system_prompt = '请你填入需要的提示'
|
126 |
+
math_bot_messages = [
|
127 |
+
{"role": "system",
|
128 |
+
"content": system_prompt},
|
129 |
+
{"role": "user", "content": text},
|
130 |
+
]
|
131 |
+
|
132 |
+
chat_completion = client.chat.completions.create(
|
133 |
+
messages=math_bot_messages,
|
134 |
+
model=model,
|
135 |
+
)
|
136 |
+
|
137 |
+
result = chat_completion.choices[0].message.content
|
138 |
+
return result
|
139 |
+
|
140 |
+
|
141 |
+
# print(extract('[###2 miles###, ###200 meters###, ###5 kilometers###]', 'rse'))
|
142 |
+
# print(extract(extract_GPE('Between Burwood and Glebe.'), 'gpe'))
|
143 |
+
|
144 |
+
|
145 |
+
|