Shunfeng Zheng commited on
Commit
db11cc9
·
verified ·
1 Parent(s): 04bf5ef

Upload 12 files

Browse files
geospacy.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from spacy.tokens import Span
2
+ from spacy.tokens import Doc
3
+ from spacy.tokens import Token
4
+ import regex_spatial
5
+ from spacy.language import Language
6
+ import re
7
+ from utils import llm_ent_extract
8
+
9
+ id =""
10
+ rse_id = "rse_id"
11
+ def set_extension():
12
+ Span.set_extension(rse_id, default = "",force = True)
13
+ Doc.set_extension(rse_id, default = "",force = True)
14
+ Token.set_extension(rse_id, default = "",force = True)
15
+
16
+ def get_level1(doc, sentence, ent):
17
+ return find_ent_by_regex(doc, sentence, ent, regex_spatial.get_level1_regex())
18
+
19
+ def get_level2(doc, sentence, ent):
20
+ return find_ent_by_regex(doc, sentence, ent, regex_spatial.get_level2_regex())
21
+
22
+ def get_level3(doc, sentence, ent):
23
+ return find_ent_by_regex(doc, sentence, ent, regex_spatial.get_level3_regex())
24
+
25
+
26
+ def find_ent_by_regex(doc, sentence, ent, regex):
27
+ global id
28
+
29
+ if id == "":
30
+ id = ent.text
31
+ for match in re.finditer(regex, doc.text):
32
+ start, end = match.span()
33
+ if(start>= sentence.start_char and start<= sentence.end_char):
34
+ span = doc.char_span(start, end)
35
+ if span is not None:
36
+ id = span.text +"_"+ id
37
+ if(start > ent.end_char):
38
+ ent.end_char = end
39
+ else:
40
+ ent.start_char = start
41
+
42
+ return ent
43
+
44
+ return ent
45
+
46
+
47
+ def update_entities(doc, entity_texts, replace=True):
48
+ """
49
+ 根据给定的文本内容标注实体,并直接修改 doc.ents。
50
+
51
+ :param doc: spaCy 解析后的 Doc 对象
52
+ :param entity_texts: 字典,键是要标注的实体文本,值是对应的实体类别
53
+ :param replace: 布尔值,True 则替换现有实体,False 则保留现有实体并添加新的
54
+ """
55
+ new_ents = list(doc.ents) if not replace else [] # 如果 replace=False,保留已有实体
56
+
57
+ for ent_text, ent_label in entity_texts.items():
58
+ start = doc.text.find(ent_text) # 在全文中查找文本位置
59
+ if start != -1:
60
+ start_token = len(doc.text[:start].split()) # 计算起始 token 索引
61
+ end_token = start_token + len(ent_text.split()) # 计算结束 token 索引
62
+
63
+ if start_token < len(doc) and end_token <= len(doc): # 确保索引不越界
64
+ new_ent = Span(doc, start_token, end_token, label=ent_label)
65
+ new_ents.append(new_ent)
66
+
67
+ doc.set_ents(new_ents) # 更新 doc.ents
68
+
69
+
70
+ def get_relative_entity(doc, sentence, ent):
71
+ global id
72
+
73
+ id = ""
74
+ rel_entity = get_level1(doc, sentence, ent)
75
+ # print(1111 ,rel_entity)
76
+ rel_entity = get_level2(doc, sentence, rel_entity)
77
+ # print(2222 ,rel_entity)
78
+ rel_entity = get_level3(doc, sentence, rel_entity)
79
+ # print(3333 ,rel_entity)
80
+
81
+ if("_" in id):
82
+ rel_entity = doc.char_span(rel_entity.start_char, rel_entity.end_char, "RSE")
83
+ rel_entity._.rse_id = id
84
+
85
+ # print(id, 'idid')
86
+ # print(rel_entity._.rse_id, '._._')
87
+
88
+ return rel_entity
89
+ rel_entity = doc.char_span(ent.start_char, ent.end_char, ent.label_)
90
+ rel_entity._.rse_id = id
91
+ # print(4444 ,rel_entity)
92
+ return rel_entity
93
+
94
+ @Language.component("spatial_pipeline")
95
+ def get_spatial_ent(doc):
96
+ set_extension()
97
+ new_ents = []
98
+ # ents = [ent for ent in doc.ents if ent.label_ == "GPE" or ent.label_ == "LOC"] # 筛选出ase
99
+
100
+
101
+ # LLM 输出
102
+ # GPE = '[###Pyrmont###, ###Glebe###]' # LLM 输出的实体
103
+ GPE = llm_ent_extract.extract_GPE(doc.text) # LLM 输出的实体
104
+ print(doc.text, 'llmin')
105
+ print(GPE, 'llout')
106
+
107
+ GPE = llm_ent_extract.extract(GPE, 'GPE')
108
+ print(GPE, 'llmout2')
109
+ update_entities(doc, GPE, True)
110
+ ents = doc.ents
111
+ print(ents, 'eee')
112
+ # print(doc, 'ddd')
113
+ # print(ents, 'ddd')
114
+ # GPE = llm_ent_extract.extract(llm_ent_extract.extract_GPE(doc.text), 'gpe')
115
+ # update_entities(doc, GPE)
116
+ # LLM 输出完毕
117
+
118
+
119
+ # print(doc.ents, 111)
120
+ # print(doc.ents[2], 222)
121
+ # print(type(doc.ents[2]), 222)
122
+ # print(doc.ents[2].label_, 333)
123
+ # print('----------')
124
+ # doc.ents[2] = 'pp'
125
+ # print(doc.ents[2], 111)
126
+ # print(doc.ents[2].label_, 222)
127
+ # print(type(doc.ents), 333)
128
+ end = None
129
+ for ent in ents:
130
+
131
+ if ent.end != len(doc):
132
+ next_token = doc[ent.end]
133
+ if end is not None:
134
+ start = end
135
+ else:
136
+ start = ent.sent.start
137
+ if next_token.text.lower() in regex_spatial.get_keywords():
138
+ end = next_token.i
139
+ else:
140
+ end = ent.end
141
+
142
+ else:
143
+ start = ent.sent.start
144
+ end = ent.end
145
+
146
+ # print(doc, '//',start, '//', end, 999888)
147
+ # print(doc[start],'//', doc[end])
148
+ # print(ents, 999)
149
+
150
+
151
+ rsi_ent = get_relative_entity(doc,Span(doc, start, end), ent)
152
+ # print(doc.ents[0]._.rse_id, '._._2')
153
+
154
+
155
+ # print(rsi_ent.text, rsi_ent.label_, rsi_ent._.rse_id)
156
+ new_ents.append(rsi_ent)
157
+
158
+ doc.ents = new_ents
159
+ return doc
160
+
161
+ # def update_doc_ents(doc, new_dict):
162
+ # """
163
+ # 更新 doc.ents, 将新的实体文本和标签添加到 doc 中。
164
+ #
165
+ # 参数:
166
+ # - doc: spaCy 的 Doc 对象
167
+ # - new_dict: 一个字典,键是实体文本,值是标签
168
+ # """
169
+ # modified_ents = []
170
+ #
171
+ # # 遍历字典中的实体文本和标签
172
+ # for ent_text, label in new_dict.items():
173
+ # # 将实体文本拆分成单词
174
+ # ent_words = ent_text.split()
175
+ #
176
+ # # 遍历 doc 中的 token 来查找第一个单词
177
+ # start = None
178
+ # for i in range(len(doc)):
179
+ # # 如果当前 token 和实体的第一个单词匹配,确定 start
180
+ # if doc[i].text == ent_words[0]:
181
+ # start = i
182
+ # # 然后检查后续的单词是否都匹配
183
+ # end = start + len(ent_words) # 计算 end 为 start + 单词数
184
+ # if all(doc[start + j].text == ent_words[j] for j in range(len(ent_words))):
185
+ # # 创建 Span 对象
186
+ # new_ent = Span(doc, start, end, label=label)
187
+ # modified_ents.append(new_ent)
188
+ # break # 找到匹配后跳出循环
189
+ #
190
+ # # 使用 doc.set_ents() 更新 doc.ents
191
+ # doc.set_ents(modified_ents)
192
+ #
193
+ #
194
+ # # def llm_extract(doc, model):
195
+ #
196
+ # def split_doc_into_sentences(doc):
197
+ # """
198
+ # 将 doc 的文本按句子分割,并返回每个句子的字符串列表。
199
+ # """
200
+ # sentence_list = [sent.text.strip() for sent in doc.sents]
201
+ # return sentence_list
202
+ #
203
+ #
204
+ # @Language.component("spatial_pipeline")
205
+ # def get_spatial_ent(doc):
206
+ #
207
+ # set_extension()
208
+ #
209
+ # split_sent = split_doc_into_sentences(doc)
210
+ # for i in range(len(split_sent)):
211
+ # gpe_dict = llm_ent_extract.extract_GPE(split_sent[i])
212
+ # loc_dict = llm_ent_extract.extract_LOC(split_sent[i])
213
+ # new_dict = gpe_dict|loc_dict
214
+ #
215
+ #
216
+ # print(gpe_dict, '111')
217
+ # print(loc_dict)
218
+ # print(new_dict)
219
+ # # new_dict = {'pp': 'ORG', 'France': 'GPE', 'Paris': 'GPE'}
220
+ #
221
+ #
222
+ # # 调用新的函数更新 doc 的实体
223
+ # update_doc_ents(doc, new_dict)
224
+ #
225
+ # # 继续处理 doc.ents
226
+ # ents = [ent for ent in doc.ents if ent.label_ == "GPE" or ent.label_ == "LOC"]
227
+ # print(ents[1].label_)
228
+ #
229
+ # end = None
230
+ # new_ents = []
231
+ #
232
+ # for ent in ents:
233
+ # if ent.end != len(doc):
234
+ # next_token = doc[ent.end + 1]
235
+ # if end is not None:
236
+ # start = end
237
+ # else:
238
+ # start = ent.sent.start
239
+ # if next_token.text.lower() in regex_spatial.get_keywords():
240
+ # end = next_token.i
241
+ # else:
242
+ # end = ent.end
243
+ # else:
244
+ # start = ent.sent.start
245
+ # end = ent.end
246
+ #
247
+ # # 调用 get_relative_entity 来获得新的实体信息
248
+ # rsi_ent = get_relative_entity(doc, Span(doc, start, end), ent)
249
+ #
250
+ # # 将处理后的实体添加到新的实体列表中
251
+ # new_ents.append(rsi_ent)
252
+ #
253
+ # doc.ents = new_ents # 更新 doc.ents
254
+ # print(new_ents, '111222')
255
+ #
256
+ # return doc
regex_spatial.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Jul 26 14:57:18 2022
5
+
6
+ @author: syed
7
+ """
8
+
9
+ from quantities import units as u
10
+ from quantities import Quantity
11
+
12
+ one_plus = "+"
13
+ zero_plus = "*"
14
+
15
+
16
+ def get_quantities_regex():
17
+ # myList = [unit for unit in dir(u.length)
18
+ # if type(getattr(u.length, unit)) is u.length ]
19
+ myList = [unit for unit in dir(u.length) if isinstance(getattr(u.length, unit), Quantity)]
20
+
21
+ units = [ x for x in myList if "_" not in x ]
22
+ units_regex = '|'.join(units)
23
+ return "["+units_regex+"]"
24
+ def get_number_regex():
25
+ regex = "[0-9]"
26
+ return regex
27
+ def get_space_regex():
28
+ regex = "\s"
29
+ return regex
30
+
31
+ def get_directional_regex():
32
+ cardinals_kwds = "north|south|east|west"
33
+ ordinals_kwds = "north-east|north-west|south-east|south-west|north east|north west|south east|south west|northeast|northwest|southeast|southwest"
34
+ symbols_kwds = "N'|S'|E'|W'|NE'|NW'|SE'|SW'"
35
+ return ordinals_kwds+"|"+symbols_kwds+"|"+cardinals_kwds
36
+
37
+ def get_center_regex():
38
+ center_kwds = "center|central|downtown|midtown"
39
+ return center_kwds
40
+
41
+ def get_near_regex():
42
+ near_kwds = "nearby|near|vicinity|close|beside|next|adjacent|immediate|border"
43
+ return near_kwds
44
+
45
+ def get_surrounding_regex():
46
+ surrounding_kwds = "surrounding|neigbourhood|proximity|territory|locality"
47
+ return surrounding_kwds
48
+ def get_level1_regex():
49
+ level_1_regex = "(?i)("+get_directional_regex()+"|"+get_center_regex()+")"
50
+ return level_1_regex
51
+
52
+ def get_level2_regex():
53
+ level_2_regex = "(?i)("+get_near_regex()+"|"+get_surrounding_regex()+")"
54
+ return level_2_regex
55
+
56
+ def get_level3_regex():
57
+ level_3_regex = "(?i)("+get_number_regex()+one_plus+get_space_regex()+zero_plus+get_quantities_regex()+one_plus+")"
58
+ return level_3_regex
59
+
60
+
61
+
62
+ def get_keywords():
63
+ keywords = []
64
+ keywords = get_directional_regex().split("|")
65
+ keywords.extend(get_near_regex().split("|"))
66
+ keywords.extend(get_surrounding_regex().split("|"))
67
+ keywords.extend(get_center_regex().split("|"))
68
+ keywords.append(",")
69
+ keywords.append("and")
70
+ keywords.append(".")
71
+
72
+ return keywords
utils/.DS_Store ADDED
Binary file (6.15 kB). View file
 
utils/__pycache__/config.cpython-310.pyc ADDED
Binary file (329 Bytes). View file
 
utils/__pycache__/geoutil.cpython-310.pyc ADDED
Binary file (6.4 kB). View file
 
utils/__pycache__/geoutil.cpython-39.pyc ADDED
Binary file (5.95 kB). View file
 
utils/__pycache__/llm_coding.cpython-310.pyc ADDED
Binary file (12 kB). View file
 
utils/__pycache__/llm_ent_extract.cpython-310.pyc ADDED
Binary file (6.78 kB). View file
 
utils/config.py ADDED
@@ -0,0 +1 @@
 
 
1
+ api_key = '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'
utils/geoutil.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Aug 2 12:45:21 2022
5
+
6
+ @author: syed
7
+ """
8
+
9
+ import math
10
+ import re
11
+ import regex_spatial
12
+ import quantities as pq
13
+ from math import radians, cos, sin, asin, sqrt
14
+ import quantities as pq
15
+
16
+
17
+
18
+ def get_kilometers(d, unit):
19
+ q = float(d) * pq.CompoundUnit(unit)
20
+ q.units = pq.km
21
+ return q.magnitude
22
+
23
+ def ConvertToRadian(input):
24
+ return input * math.pi / 180
25
+
26
+ def get_level1(ent):
27
+ level_1 = re.search(regex_spatial.get_level1_regex(), ent)
28
+ if level_1 is not None:
29
+ return level_1.group()
30
+ return None
31
+ def get_level2(ent):
32
+ level_2 = re.search(regex_spatial.get_level2_regex(), ent)
33
+ if level_2 is not None:
34
+ return level_2.group()
35
+ return None
36
+ def get_level3(ent):
37
+ level_3 = re.search(regex_spatial.get_level3_regex(), ent)
38
+ if level_3 is not None:
39
+ return level_3.group()
40
+ return None
41
+
42
+ def get_ase(ent):
43
+ abs_sp = ent.split("_")
44
+ return abs_sp[len(abs_sp)-1]
45
+
46
+ def get_ent(ent):
47
+ return get_ase(ent), get_level1(ent), get_level2(ent), get_level3(ent)
48
+
49
+ def get_centroid(coordinates, centroid, mini, maxi):
50
+ average = (mini + maxi)/2
51
+ diff = []
52
+ ind = 0
53
+ for p in coordinates:
54
+ diff.append(abs( p[2] - average))
55
+
56
+ ind = diff.index(min(diff))
57
+
58
+ return midpoint(centroid[0], centroid[1], coordinates[ind][0], coordinates[ind][1], average)
59
+
60
+
61
+ def calculateArea(coordinates):
62
+ area = 0
63
+ if (len(coordinates) > 2):
64
+ i = 0
65
+ for i in range(len(coordinates) - 1):
66
+ p1 = coordinates[i]
67
+ p2 = coordinates[i + 1]
68
+ area += math.radians(p2[0] - p1[0]) * (2 + math.sin(ConvertToRadian(p1[1])) + math.sin(math.radians(p2[0])))
69
+
70
+
71
+ area = area * 6378137 * 6378137 / 1000000
72
+
73
+ area = abs(round(area, 2)) + 2
74
+
75
+ return area
76
+
77
+ def get_midmid_point(centroid, point1, point2, is_midmid):
78
+ mid1 = midpoint(centroid[0], centroid[1],
79
+ point1[0], point1[1]
80
+ , point1[2])
81
+ mid2 = midpoint(centroid[0], centroid[1],
82
+ point2[0], point2[1],
83
+ point2[2])
84
+ midmid1 = midpoint(centroid[0], centroid[1],
85
+ mid1[0], mid1[1]
86
+ , mid1[2])
87
+ midmid2 = midpoint(centroid[0], centroid[1],
88
+ mid2[0], mid2[1],
89
+ mid2[2])
90
+ if is_midmid:
91
+ return midmid1, midmid2
92
+ else:
93
+ return mid1, mid2
94
+
95
+ def getPointByDistanceAngle(lat, ln, angle, distanceInKm):
96
+
97
+ R = 6378.1 #Radius of the Earth
98
+ brng = angle * math.pi /180 #Bearing is 90 degrees converted to radians.
99
+ d = distanceInKm #Distance in km
100
+
101
+ #lat2 52.20444 - the lat result I'm hoping for
102
+ #lon2 0.36056 - the long result I'm hoping for.
103
+
104
+ lat1 = math.radians(lat) #Current lat point converted to radians
105
+ lon1 = math.radians(ln) #Current long point converted to radians
106
+
107
+ lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
108
+ math.cos(lat1)*math.sin(d/R)*math.cos(brng))
109
+
110
+ lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
111
+ math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
112
+
113
+ lat2 = math.degrees(lat2)
114
+ lon2 = math.degrees(lon2)
115
+
116
+ return [lon2, lat2, angle]
117
+
118
+ def midpoint(x1, y1, x2, y2, angle):
119
+
120
+ lonA = math.radians(y1)
121
+ lonB = math.radians(y2)
122
+ latA = math.radians(x1)
123
+ latB = math.radians(x2)
124
+
125
+ dLon = lonB - lonA
126
+
127
+ Bx = math.cos(latB) * math.cos(dLon)
128
+ By = math.cos(latB) * math.sin(dLon)
129
+
130
+ latC = math.atan2(math.sin(latA) + math.sin(latB),
131
+ math.sqrt((math.cos(latA) + Bx) * (math.cos(latA) + Bx) + By * By))
132
+ lonC = lonA + math.atan2(By, math.cos(latA) + Bx)
133
+ lonC = (lonC + 3 * math.pi) % (2 * math.pi) - math.pi
134
+ latitude = round(math.degrees(latC), 8)
135
+ longitude = round(math.degrees(lonC),8)
136
+ return [latitude, longitude, angle]
137
+
138
+ def midpoint(y1, x1, y2, x2, angle):
139
+
140
+ lonA = math.radians(y1)
141
+ lonB = math.radians(y2)
142
+ latA = math.radians(x1)
143
+ latB = math.radians(x2)
144
+
145
+ dLon = lonB - lonA
146
+
147
+ Bx = math.cos(latB) * math.cos(dLon)
148
+ By = math.cos(latB) * math.sin(dLon)
149
+
150
+ latC = math.atan2(math.sin(latA) + math.sin(latB),
151
+ math.sqrt((math.cos(latA) + Bx) * (math.cos(latA) + Bx) + By * By))
152
+ lonC = lonA + math.atan2(By, math.cos(latA) + Bx)
153
+ lonC = (lonC + 3 * math.pi) % (2 * math.pi) - math.pi
154
+ latitude = round(math.degrees(latC), 8)
155
+ longitude = round(math.degrees(lonC) ,8)
156
+ print([longitude, latitude, angle], 'midmid')
157
+ return [longitude, latitude, angle
158
+
159
+ ]
160
+
161
+ def calculate_bearing(pointA, pointB):
162
+
163
+ if (type(pointA) != tuple) or (type(pointB) != tuple):
164
+ return 400
165
+ if (type(pointB[0]) != float) or (type(pointB[0]) != float):
166
+ return 400
167
+
168
+ lat1 = math.radians(pointA[0])
169
+ lat2 = math.radians(pointB[0])
170
+
171
+ diffLong = math.radians(pointB[1] - pointA[1])
172
+
173
+ x = math.sin(diffLong) * math.cos(lat2)
174
+ y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
175
+ * math.cos(lat2) * math.cos(diffLong))
176
+
177
+ initial_bearing = math.atan2(x, y)
178
+
179
+
180
+ initial_bearing = math.degrees(initial_bearing)
181
+ compass_bearing = (initial_bearing + 360) % 360
182
+
183
+ return compass_bearing
184
+
185
+ def getPointByDistanceAngle(lat, ln, angle, distance, unit):
186
+
187
+ #distanceInKm = distance
188
+ R = 6378.1 #Radius of the Earth
189
+ brng = float(angle) * math.pi /180 #Bearing is 90 degrees converted to radians.
190
+ d = get_kilometers(distance, unit) #Distance in km
191
+
192
+
193
+ lat1 = math.radians(lat) #Current lat point converted to radians
194
+ lon1 = math.radians(ln) #Current long point converted to radians
195
+
196
+ lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
197
+ math.cos(lat1)*math.sin(d/R)*math.cos(brng))
198
+
199
+ lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
200
+ math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
201
+
202
+ lat2 = math.degrees(lat2)
203
+ lon2 = math.degrees(lon2)
204
+
205
+ return (round(lon2,8), round(lat2,8), angle)
206
+
207
+
208
+ def calculatePointByDistance(lat, ln, angle, distance, unit):
209
+ coff = 100/(6378*1.56)
210
+ kms = get_kilometers(distance, unit)
211
+
212
+ d = kms * coff
213
+
214
+ angle_x = math.cos( angle ) # * math.pi/180
215
+ angle_y = math.sin( angle) # * math.pi/180
216
+ lat_new = lat + (d * angle_x)
217
+ ln_new = ln + (d * angle_y)
218
+
219
+ return (round(ln_new,8), round(lat_new,8), angle)
220
+
221
+
222
+
223
+ def pointByAngle(lat, ln, angle, distance, unit):
224
+
225
+ R = 6378.1 #Radius of the Earth
226
+ brng = angle * math.pi /180 #Bearing is 90 degrees converted to radians.
227
+ d = get_kilometers(distance, unit) #Distance in km
228
+
229
+ #lat2 52.20444 - the lat result I'm hoping for
230
+ #lon2 0.36056 - the long result I'm hoping for.
231
+
232
+ lat1 = math.radians(lat) #Current lat point converted to radians
233
+ lon1 = math.radians(ln) #Current long point converted to radians
234
+
235
+ lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
236
+ math.cos(lat1)*math.sin(d/R)*math.cos(brng))
237
+
238
+ lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
239
+ math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
240
+
241
+ lat2 = math.degrees(lat2)
242
+ lon2 = math.degrees(lon2)
243
+
244
+ return (lon2, lat2, angle)
245
+
246
+
247
+ def getPointByDistance(lat, ln, angle, distance, unit):
248
+ kms = get_kilometers(distance, unit)
249
+ coef = kms / 111.32
250
+ new_lat = lat + coef
251
+ new_long = ln + coef / math.cos(lat * 0.01745)
252
+ return (round(new_lat,8), round(new_long,8), angle)
253
+
254
+ def haversine(lon1, lat1, lon2, lat2):
255
+ lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
256
+ dlon = lon2 - lon1
257
+ dlat = lat2 - lat1
258
+ a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
259
+ c = 2 * asin(sqrt(a))
260
+ km = 6371* c
261
+ return km
262
+
utils/llm_coding.py ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import urllib3
3
+ import json
4
+ from utils import geoutil
5
+ import regex_spatial
6
+ from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping
7
+ import re
8
+ import geopandas as gpd
9
+ from geocoder import geo_level1
10
+ from openai import OpenAI
11
+ from utils.config import api_key
12
+
13
+ client = OpenAI(
14
+ api_key=api_key
15
+ )
16
+
17
+
18
+ model = "gpt-4o"
19
+
20
+ north = ["north", "N'", "North", "NORTH"]
21
+ south = ["south", "S'", "South", "SOUTH"]
22
+ east = ["east", "E'", "East", "EAST"]
23
+ west = ["west", "W'", "West", "WEST"]
24
+ northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"]
25
+ southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"]
26
+ northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"]
27
+ southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"]
28
+ center = ["center","central", "downtown","midtown"]
29
+ #
30
+ #
31
+ # def get_directional_coordinates(coordinates, direction, centroid, minimum, maximum, is_midmid):
32
+ # direction_coordinates = get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum)
33
+ # midmid1, midmid2 = geoutil.get_midmid_point(centroid, direction_coordinates[0], direction_coordinates[-1],
34
+ # is_midmid)
35
+ # if direction in west:
36
+ # maxi = max(p[2] for p in direction_coordinates)
37
+ # mini = min(p[2] for p in direction_coordinates)
38
+ # index_mini = 0
39
+ # index_maxi = 0
40
+ # for idx, p in enumerate(direction_coordinates):
41
+ # if p[2] == mini:
42
+ # index_mini = idx
43
+ # if p[2] == maxi:
44
+ # index_maxi = idx
45
+ #
46
+ # direction_coordinates.insert(index_maxi + 1, midmid2)
47
+ # direction_coordinates.insert(index_mini + 1, midmid1)
48
+ # else:
49
+ # direction_coordinates.append(midmid2)
50
+ # direction_coordinates.append(midmid1)
51
+ #
52
+ # return direction_coordinates, midmid1, midmid2
53
+ #
54
+ #
55
+ # def get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum):
56
+ # direction_coordinates = []
57
+ # for p in coordinates:
58
+ # if direction in east:
59
+ # if p[2] >= minimum or p[2] <= maximum:
60
+ # direction_coordinates.append(p)
61
+ #
62
+ # else:
63
+ # if p[2] >= minimum and p[2] <= maximum:
64
+ # direction_coordinates.append(p)
65
+ # return direction_coordinates
66
+ #
67
+ #
68
+ # def get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum):
69
+ # direction_coordinates = []
70
+ # for p in coordinates:
71
+ # if direction in east:
72
+ # if p[2] >= minimum or p[2] <= maximum:
73
+ # direction_coordinates.append(p)
74
+ #
75
+ # else:
76
+ # if p[2] >= minimum and p[2] <= maximum:
77
+ # direction_coordinates.append(p)
78
+ # return direction_coordinates
79
+ #
80
+ #
81
+ # def get_central(coordinates, centroid, direction, is_midmid):
82
+ # n_min_max = get_min_max("north")
83
+ # n_coordinates = get_directional_coordinates_by_angle(coordinates, "north", n_min_max[0], n_min_max[1])
84
+ # n_mid1, n_mid2 = geoutil.get_midmid_point(centroid, n_coordinates[0], n_coordinates[-1], is_midmid)
85
+ #
86
+ # ne_min_max = get_min_max("north east")
87
+ # ne_coordinates = get_directional_coordinates_by_angle(coordinates, "north east", ne_min_max[0], ne_min_max[1])
88
+ # ne_mid1, ne_mid2 = geoutil.get_midmid_point(centroid, ne_coordinates[0], ne_coordinates[-1], is_midmid)
89
+ #
90
+ # e_min_max = get_min_max("east")
91
+ # e_coordinates = get_directional_coordinates_by_angle(coordinates, "east", e_min_max[0], e_min_max[1])
92
+ # e_mid1, e_mid2 = geoutil.get_midmid_point(centroid, e_coordinates[0], e_coordinates[-1], is_midmid)
93
+ #
94
+ # se_min_max = get_min_max("south east")
95
+ # se_coordinates = get_directional_coordinates_by_angle(coordinates, "south east", se_min_max[0], se_min_max[1])
96
+ # se_mid1, se_mid2 = geoutil.get_midmid_point(centroid, se_coordinates[0], se_coordinates[-1], is_midmid)
97
+ #
98
+ # s_min_max = get_min_max("south")
99
+ # s_coordinates = get_directional_coordinates_by_angle(coordinates, "south", s_min_max[0], s_min_max[1])
100
+ # s_mid1, s_mid2 = geoutil.get_midmid_point(centroid, s_coordinates[0], s_coordinates[-1], is_midmid)
101
+ #
102
+ # sw_min_max = get_min_max("south west")
103
+ # sw_coordinates = get_directional_coordinates_by_angle(coordinates, "south west", sw_min_max[0], sw_min_max[1])
104
+ # sw_mid1, sw_mid2 = geoutil.get_midmid_point(centroid, sw_coordinates[0], sw_coordinates[-1], is_midmid)
105
+ #
106
+ # w_min_max = get_min_max("west")
107
+ # w_coordinates = get_directional_coordinates_by_angle(coordinates, "west", w_min_max[0], w_min_max[1])
108
+ # w_mid1, w_mid2 = geoutil.get_midmid_point(centroid, w_coordinates[0], w_coordinates[-1], is_midmid)
109
+ #
110
+ # nw_min_max = get_min_max("north west")
111
+ # nw_coordinates = get_directional_coordinates_by_angle(coordinates, "north west", nw_min_max[0], nw_min_max[1])
112
+ # nw_mid1, nw_mid2 = geoutil.get_midmid_point(centroid, nw_coordinates[0], nw_coordinates[-1], is_midmid)
113
+ #
114
+ # central_coordindates = [e_mid1, e_mid2, ne_mid1, ne_mid2, n_mid1, n_mid2,
115
+ # nw_mid1, nw_mid2, w_mid1, w_mid2, sw_mid1, sw_mid2,
116
+ # s_mid1, s_mid2, se_mid1, se_mid2]
117
+ # return central_coordindates
118
+ #
119
+ #
120
+ # def get_min_max(direction):
121
+ # regex = regex_spatial.get_directional_regex()
122
+ # direction_list = regex.split("|")
123
+ # if direction in direction_list:
124
+ # if direction in east:
125
+ # return (337, 22)
126
+ # if direction in northeast:
127
+ # return (22, 67)
128
+ # if direction in north:
129
+ # return (67, 112)
130
+ # if direction in northwest:
131
+ # return (112, 157)
132
+ # if direction in west:
133
+ # return (157, 202)
134
+ # if direction in southwest:
135
+ # return (202, 247)
136
+ # if direction in south:
137
+ # return (247, 292)
138
+ # if direction in southeast:
139
+ # return (292, 337)
140
+ #
141
+ # return None
142
+ # def get_level1_coordinates(coordinates, centroid, direction, is_midmid):
143
+ # min_max = get_min_max(direction)
144
+ # if min_max is not None:
145
+ # coordinates, mid1, mid2 = get_directional_coordinates(coordinates, direction, centroid, min_max[0], min_max[1], is_midmid)
146
+ # return coordinates, centroid, mid1, mid2
147
+ # elif direction.lower() in center:
148
+ # return get_central(coordinates, centroid, direction, is_midmid), centroid, None, None
149
+ # else:
150
+ # return coordinates, centroid, None, None
151
+ def to_standard_2d_list(data):
152
+ arr = np.array(data)
153
+
154
+ # 强制变成一维后 reshape,前提是元素总数是2的倍数
155
+ flat = arr.flatten()
156
+ if flat.size % 2 != 0:
157
+ raise ValueError("元素个数不是2的倍数,不能 reshape 成 [N, 2] 格式")
158
+
159
+ return flat.reshape(-1, 2).tolist()
160
+
161
+
162
+ def get_geojson(ent, arr, centroid):
163
+ poly_json = {}
164
+ poly_json['type'] = 'FeatureCollection'
165
+ poly_json['features'] = []
166
+ coordinates= []
167
+ coordinates.append(arr)
168
+ poly_json['features'].append({
169
+ 'type':'Feature',
170
+ 'id': ent,
171
+ 'properties': {
172
+ 'centroid': centroid
173
+ },
174
+ 'geometry': {
175
+ 'type':'Polygon',
176
+ 'coordinates': coordinates
177
+ }
178
+ })
179
+ return poly_json
180
+
181
+
182
+ def get_coordinates(ent):
183
+ request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2'
184
+ headers = {
185
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
186
+ }
187
+ page = requests.get(request_url, headers=headers, verify=False)
188
+ json_content = json.loads(page.content)
189
+ all_coordinates = json_content[0]['geojson']['coordinates'][0]
190
+ centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
191
+ for p in all_coordinates:
192
+ p2 = (p[0], p[1])
193
+ angle = geoutil.calculate_bearing(centroid, p2)
194
+ p.append(angle)
195
+
196
+ geojson = get_geojson(ent, all_coordinates, centroid)
197
+
198
+ return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid']
199
+
200
+ def get_coordinates(location):
201
+ request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2'
202
+
203
+ print(request_url)
204
+ headers = {"User-Agent": "Mozilla/5.0"}
205
+ response = requests.get(request_url, headers=headers, verify=False)
206
+ json_content = json.loads(response.content)
207
+ # print(json_content)
208
+ if json_content[0]['geojson']['type'] == 'Polygon':
209
+ coordinates = json_content[0]['geojson']['coordinates'][0]
210
+ elif json_content[0]['geojson']['type'] == 'Point':
211
+ coordinates = json_content[0]['geojson']['coordinates']
212
+ else:
213
+ print(json_content[0]['geojson']['type'])
214
+ centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
215
+ return (coordinates, centroid)
216
+
217
+
218
+ # level3
219
+ def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum):
220
+ # minimum = 157
221
+ # maximum = 202
222
+
223
+ direction_coordinates = []
224
+ for p in coordinates:
225
+ angle = geoutil.calculate_bearing(centroid, p)
226
+ p2 = (p[0], p[1], angle)
227
+ if direction in geo_level1.east:
228
+ if angle >= minimum or angle <= maximum:
229
+ direction_coordinates.append(p2)
230
+
231
+ else:
232
+ if angle >= minimum and angle <= maximum:
233
+ direction_coordinates.append(p2)
234
+ # print(type(direction_coordinates[0]))
235
+ # if(direction in geo_level1.west):
236
+ # direction_coordinates.sort(key=lambda k: k[2], reverse=True)
237
+
238
+ return direction_coordinates
239
+ def get_level3(level3):
240
+ digits = re.findall('[0-9]+', level3)[0]
241
+ unit = re.findall('[A-Za-z]+', level3)[0]
242
+ return digits, unit
243
+
244
+ def get_direction_coordinates(coordinates, centroid, level1):
245
+ min_max = geo_level1.get_min_max(level1)
246
+ if min_max is not None:
247
+ coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1])
248
+ return coord
249
+ return coordinates
250
+ def sort_west(poly1, poly2, centroid):
251
+ coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"]
252
+ coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"]
253
+ coord1 = []
254
+ coord2 = []
255
+ coord = []
256
+ for c in coords1:
257
+ pol = list(c[::-1])
258
+ coord1.extend(pol)
259
+ for c in coords2:
260
+ pol = list(c[::-1])
261
+ coord2.extend(pol)
262
+ coo1 = []
263
+ coo2 = []
264
+ for p in coord1:
265
+ angle = geoutil.calculate_bearing(centroid, p)
266
+ if angle >= 157 and angle <= 202:
267
+ coo1.append((p[0], p[1], angle))
268
+ for p in coord2:
269
+ angle = geoutil.calculate_bearing(centroid, p)
270
+ if angle >= 157 and angle <= 202:
271
+ coo2.append((p[0], p[1], angle))
272
+ coo1.extend(coo2)
273
+ return coo1
274
+
275
+
276
+ def get_level3_coordinates(coordinates, level_3, level1):
277
+ distance, unit = get_level3(level_3)
278
+ kms = geoutil.get_kilometers(distance, unit)
279
+ coord = []
280
+
281
+ coords0, center = coordinates
282
+
283
+ if not isinstance(coords0, list) or len(coords0) < 3:
284
+
285
+ # 从原始点出发,根据方向移动距离 kms 得到新圆心
286
+ lat_km = 111.32
287
+ lon_km = 111.32 * np.cos(np.radians(center[1]))
288
+
289
+ dx = dy = 0
290
+
291
+ if level1 is not None:
292
+ if level1 in geo_level1.east:
293
+ dx = kms / lon_km
294
+ elif level1 in geo_level1.west:
295
+ dx = -kms / lon_km
296
+ elif level1 in geo_level1.north:
297
+ dy = kms / lat_km
298
+ elif level1 in geo_level1.south:
299
+ dy = -kms / lat_km
300
+ # 你也可以支持 northeast、southwest 等复合方向
301
+
302
+ new_center = (center[0] + dx, center[1] + dy)
303
+
304
+ # 用固定半径画个圆(例如半径2km)
305
+ r_km = 1 # 半径设为1km,你也可以设为其他值
306
+
307
+ circle_points = []
308
+ for theta in np.linspace(0, 360, num=100):
309
+ theta_rad = np.radians(theta)
310
+ d_lat = (np.sin(theta_rad) * r_km) / lat_km
311
+ d_lon = (np.cos(theta_rad) * r_km) / lon_km
312
+ circle_points.append((new_center[0] + d_lon, new_center[1] + d_lat))
313
+
314
+ # 输出中心(使用新圆心)
315
+ if circle_points:
316
+ center_point = MultiPoint(circle_points).centroid
317
+ center = (center_point.x, center_point.y)
318
+ else:
319
+ center = new_center
320
+
321
+ return circle_points, center
322
+
323
+ # 正常 polygon 流程
324
+ poly1 = Polygon(coords0)
325
+ polygon1 = gpd.GeoSeries(poly1)
326
+
327
+ # 生成环形区域
328
+ poly2 = polygon1.buffer(0.0095 * kms, join_style=2)
329
+ poly3 = polygon1.buffer(0.013 * kms, join_style=2)
330
+ poly = poly3.difference(poly2)
331
+
332
+ # 获取坐标
333
+ coords = mapping(poly)["features"][0]["geometry"]["coordinates"]
334
+ for c in coords:
335
+ pol = list(c[::-1])
336
+ coord.extend(pol)
337
+
338
+ # 方向裁剪
339
+ if level1 is not None:
340
+ coord = get_direction_coordinates(coord, coordinates[1], level1)
341
+ if level1 in geo_level1.west:
342
+ coord = sort_west(poly3, poly2, coordinates[1])
343
+
344
+ # 计算质心
345
+ if coord:
346
+ center_point = MultiPoint(coord).centroid
347
+ center = (center_point.x, center_point.y)
348
+ else:
349
+ center = coordinates[1]
350
+
351
+ return coord, center
352
+ # level 3 end
353
+
354
+ # between
355
+ def get_between_coordinates(coordinates1, coordinates2):
356
+ """
357
+ 计算两个区域之间的中间点,并生成一个等面积的圆形区域。
358
+ 如果某个输入仅为点(坐标长度 < 3),则其面积设为 0;
359
+ 如果两个输入都是点,则默认半径为 2km。
360
+ :param coordinates1: 第一个区域的边界坐标和中心点
361
+ :param coordinates2: 第二个区域的边界坐标和中心点
362
+ :return: 圆形区域的坐标集和圆心
363
+ """
364
+
365
+ def is_valid_polygon(coords):
366
+ return isinstance(coords, list) and len(coords) >= 3
367
+
368
+ coords1, center1 = coordinates1
369
+ coords2, center2 = coordinates2
370
+
371
+ # 判断输入是否为合法多边形(>=3个点)
372
+ if is_valid_polygon(coords1):
373
+ poly1 = Polygon(coords1)
374
+ area1 = poly1.area
375
+ else:
376
+ area1 = 0
377
+
378
+ if is_valid_polygon(coords2):
379
+ poly2 = Polygon(coords2)
380
+ area2 = poly2.area
381
+ else:
382
+ area2 = 0
383
+
384
+ # 计算中心点(两个中心的中点)
385
+ midpoint = (
386
+ (center1[0] + center2[0]) / 2,
387
+ (center1[1] + center2[1]) / 2
388
+ )
389
+
390
+ # 如果两个区域都是点,则使用默认半径 2km
391
+ if area1 == 0 and area2 == 0:
392
+ r_km = 2
393
+ else:
394
+ avg_area = (area1 + area2) / 2
395
+ r_km = np.sqrt(avg_area / np.pi) * 111.32 # 近似 km 半径
396
+
397
+ # 经纬度距离换算因子
398
+ lat_km = 111.32
399
+ lon_km = 111.32 * np.cos(np.radians(midpoint[1]))
400
+
401
+ # 生成圆形区域坐标(100个点)
402
+ circle_points = []
403
+ for theta in np.linspace(0, 360, num=100):
404
+ theta_rad = np.radians(theta)
405
+ d_lat = (np.sin(theta_rad) * r_km) / lat_km
406
+ d_lon = (np.cos(theta_rad) * r_km) / lon_km
407
+ circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat))
408
+
409
+ return circle_points, midpoint
410
+ # between end
411
+
412
+
413
+ def llmapi(text):
414
+ system_prompt = (
415
+ "你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n"
416
+ "你能选择的定位函数有:\n"
417
+ "1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n"
418
+ "2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n"
419
+ "请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n"
420
+ "请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n"
421
+ "每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n"
422
+ "所有方向必须使用英文(如 south, west, northeast, etc.)。\n"
423
+ "示例输出:\n"
424
+ "<<<JSON>>>\n"
425
+ "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
426
+ "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
427
+ "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
428
+ "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
429
+ "<<<END>>>")
430
+
431
+ messages = [
432
+ {"role": "system", "content": system_prompt},
433
+ {"role": "user", "content": text},
434
+ ]
435
+
436
+ chat_completion = client.chat.completions.create(
437
+ messages=messages,
438
+ model=model,
439
+ )
440
+
441
+ result = chat_completion.choices[0].message.content
442
+ json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
443
+
444
+ if json_match:
445
+ # print(json.loads(json_match.group(1)))
446
+ return json.loads(json_match.group(1))
447
+ else:
448
+ raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
449
+ def llmapi(text):
450
+ system_prompt = (
451
+ "You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n"
452
+ "The positioning functions you can choose from are:\n"
453
+ "1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n"
454
+ "2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n"
455
+ "You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations."
456
+ "First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n"
457
+ "Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n"
458
+ "Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n"
459
+ "All directions must be in English (e.g., south, west, northeast, etc.).\n"
460
+ "Example output:\n"
461
+ "<<<JSON>>>\n"
462
+ "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
463
+ "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
464
+ "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
465
+ "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
466
+ "<<<END>>>")
467
+
468
+ messages = [
469
+ {"role": "system", "content": system_prompt},
470
+ {"role": "user", "content": text},
471
+ ]
472
+
473
+ chat_completion = client.chat.completions.create(
474
+ messages=messages,
475
+ model=model,
476
+ )
477
+
478
+ result = chat_completion.choices[0].message.content
479
+ print(result)
480
+ json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
481
+
482
+ if json_match:
483
+ return json.loads(json_match.group(1))
484
+ else:
485
+ raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
486
+
487
+
488
+
489
+
490
+
491
+ def execute_steps(steps):
492
+ data = {}
493
+
494
+ for step in steps:
495
+ step_id = step['id']
496
+ function = step['function']
497
+ inputs = step['inputs']
498
+ # print('-' * 50)
499
+ # print(function)
500
+ # print(inputs)
501
+
502
+
503
+ resolved_inputs = []
504
+ for inp in inputs:
505
+ if isinstance(inp, int):
506
+ resolved_inputs.append(data[inp])
507
+ else:
508
+ resolved_inputs.append(inp)
509
+ if function == "Relative":
510
+ location, direction, distance = resolved_inputs
511
+ if isinstance(location, str):
512
+ location = get_coordinates(location)
513
+
514
+ location = [to_standard_2d_list(location[0])] + list(location[1:])
515
+ location = [[[151.214901,-33.859175]], (151.214901,-33.859175)]
516
+ result = get_level3_coordinates(location, distance, direction)
517
+ data[step_id] = result
518
+
519
+ elif function == "Between":
520
+
521
+
522
+ location1, location2 = resolved_inputs
523
+ # print(location1)
524
+ # print(111)
525
+ # print(location2)
526
+ if isinstance(location1, str):
527
+ location1 = get_coordinates(location1)
528
+
529
+ location1 = [to_standard_2d_list(location1[0])] + list(location1[1:])
530
+ if isinstance(location2, str):
531
+
532
+ location2 = get_coordinates(location2)
533
+ location2 = [to_standard_2d_list(location2[0])] + list(location2[1:])
534
+ result = get_between_coordinates(location1, location2)
535
+
536
+ data[step_id] = result
537
+
538
+ return data
539
+
540
+
541
+
542
+ if __name__ == '__main__':
543
+ # a = get_coordinates('Burwood')
544
+ # a2 = get_coordinates('Glebe')
545
+ # b = get_level3_coordinates(a, '5 km', 'east')
546
+ # c = get_between_coordinates(a, a2)
547
+
548
+ # 完整通道
549
+ # 默认输入
550
+ # default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。"
551
+ # default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。"
552
+ # default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置"
553
+ # default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel."
554
+ # default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney."
555
+
556
+
557
+
558
+ # 解析 LLM 结果
559
+ # parsed_steps = llmapi(default_input_text)
560
+ # parsed_steps = [{'id': 1, 'function': 'Relative', 'inputs': ['Chatswood', 'south', '4 km']}, {'id': 2, 'function': 'Relative', 'inputs': ['North Sydney', 'east', '2 km']}, {'id': 3, 'function': 'Between', 'inputs': [1, 2]}, {'id': 4, 'function': 'Relative', 'inputs': [3, 'south west', '5 km']}]
561
+ # parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}]
562
+ # parsed_steps = [{"id": 1, "function": "Relative", "inputs": ["Katoomba", "southeast", "3 km"]}, {"id": 2, "function": "Between", "inputs": [1, "Echo Point"]}]
563
+ # parsed_steps = [{'id': 1, 'function': 'Relative', 'inputs': ['Scafell Pike', 'east', '9 km']}]
564
+ # parsed_steps = [{'id': 1, 'function': 'Relative', 'inputs': ['Colosseum', 'northeast', '8 km']}, {'id': 2, 'function': 'Relative', 'inputs': [1, 'northeast', '2 km']}]
565
+ parsed_steps = [
566
+ {"id": 1, "function": "Between", "inputs": ["Statue of Liberty", "Eiffel Tower"]},
567
+ {"id": 2, "function": "Relative", "inputs": [1, "west", "8 km"]}
568
+ ]
569
+
570
+ # 执行步骤
571
+ result = execute_steps(parsed_steps)
572
+ # 输出最终计算结果
573
+ print(result)
574
+ print('-' * 100)
575
+ print(result[(max(result.keys()))][0])
576
+ # 通道结束
577
+
578
+ # location = get_coordinates('Chatswood')
579
+ # result = get_level3_coordinates(location, '4 km', 'north west')
580
+ # print(result)
581
+
582
+
utils/llm_ent_extract.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import re
3
+ from utils.config import api_key
4
+
5
+ client = OpenAI(
6
+ api_key=api_key
7
+ )
8
+
9
+ model = "gpt-3.5-turbo"
10
+ model = "gpt-4o"
11
+
12
+ def extract_GPE(text):
13
+ system_prompt = '''You are a professional geographer. Your task is to extract all geopolitical entities from a given text. Geopolitical entities can include countries, regions, cities, autonomous regions, or other administrative divisions. For each geopolitical entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
14
+ sent = 'Where is France?'
15
+
16
+ math_bot_messages = [
17
+ {"role": "system",
18
+ "content": system_prompt},
19
+ {"role": "user", "content": text},
20
+ ]
21
+
22
+ chat_completion = client.chat.completions.create(
23
+ messages=math_bot_messages,
24
+ model=model,
25
+ )
26
+
27
+ result = chat_completion.choices[0].message.content
28
+ return result
29
+
30
+
31
+ def extract_LOC(text):
32
+ system_prompt = '''You are a professional geographer. Your task is to extract all location entities (LOC) from a given text. Location entities can include physical locations such as landmarks, geographical features, mountains, rivers, oceans, and places, but do not include political or administrative divisions such as countries or cities (these are considered geopolitical entities). For each location entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
33
+ sent = 'The Grand Canyon is one of the most spectacular natural wonders in the world, located in the state of Arizona. Nearby, the Colorado River flows through the canyon, carving its way through the rugged terrain. In the north, the Rocky Mountains stretch across several states, including Colorado and Wyoming.'
34
+
35
+ math_bot_messages = [
36
+ {"role": "system",
37
+ "content": system_prompt},
38
+ {"role": "user", "content": text},
39
+ ]
40
+
41
+ chat_completion = client.chat.completions.create(
42
+ messages=math_bot_messages,
43
+ model=model,
44
+ )
45
+
46
+ result = chat_completion.choices[0].message.content
47
+ return result
48
+
49
+
50
+ def extract_RSE_1(text):
51
+ system_prompt = '''You are a professional geographer. Your task is to extract all spatial entities (directional keywords) from a given text. Spatial entities can include directional keywords such as north, south, east, west, and more specific terms like northeast, northwest, southeast, southwest, as well as terms indicating locations like center, central, downtown, and midtown. For each spatial entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The hotel is located in the downtown area of New York, just south of Central Park, with a beautiful view of the southeast corner."\n\nExpected Output:\n[###downtown###, ###south###, ###southeast###]'''
52
+ sent = 'The train station is situated in the central part of the city, just north of the river and east of the main square.'
53
+
54
+ math_bot_messages = [
55
+ {"role": "system",
56
+ "content": system_prompt},
57
+ {"role": "user", "content": text},
58
+ ]
59
+
60
+ chat_completion = client.chat.completions.create(
61
+ messages=math_bot_messages,
62
+ model=model,
63
+ )
64
+
65
+ result = chat_completion.choices[0].message.content
66
+ return result
67
+
68
+
69
+ def extract_RSE_2(text):
70
+ system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy spatial entities (keywords) from a given text. Fuzzy spatial keywords can include terms like nearby, near, vicinity, close, beside, next, adjacent, immediate, border, surrounding, neighbourhood, proximity, territory, locality, and similar terms. For each fuzzy spatial keyword, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located nearby the lake, with several cafes close to the walking paths, and a small garden adjacent to the main entrance."\n\nExpected Output:\n[###nearby###, ###close###, ###adjacent###]'''
71
+ sent = 'The village is situated in the vicinity of the mountain range, with a small river flowing beside the houses and several farms next to the road.'
72
+
73
+ math_bot_messages = [
74
+ {"role": "system",
75
+ "content": system_prompt},
76
+ {"role": "user", "content": text},
77
+ ]
78
+
79
+ chat_completion = client.chat.completions.create(
80
+ messages=math_bot_messages,
81
+ model=model,
82
+ )
83
+
84
+ result = chat_completion.choices[0].message.content
85
+ return result
86
+
87
+
88
+ def extract_RSE_3(text):
89
+ system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy distance keywords from a given text. Fuzzy distance keywords include numeric values followed by distance units such as kilometer, mile, meter, foot, inch, centimeter, and other related units. The distance units can be in different formats, such as km, m, mi, ft, yd, cm, mm, or even in full words like kilometer, mile, or inch. For each fuzzy distance keyword, wrap the entire expression (number and unit) in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located 3 km away from the city center, while the nearest supermarket is only 500 meters from here, and the lake is about 1 mile further down the road."\n\nExpected Output:\n[###3 km###, ###500 meters###, ###1 mile###]'''
90
+ sent = 'The school is located approximately 2 miles from the station, while the nearest bus stop is 200 meters away. The hiking trail is about 5 kilometers east of the town center.'
91
+
92
+ math_bot_messages = [
93
+ {"role": "system",
94
+ "content": system_prompt},
95
+ {"role": "user", "content": text},
96
+ ]
97
+
98
+ chat_completion = client.chat.completions.create(
99
+ messages=math_bot_messages,
100
+ model=model,
101
+ )
102
+
103
+ result = chat_completion.choices[0].message.content
104
+ return result
105
+
106
+
107
+ def extract(respond, entity_type):
108
+ """
109
+ 从输入字符串中提取被 ### 包裹的实体,并将其映射到给定的实体类型。
110
+
111
+ :param respond: 包含实体的字符串,例如 '[###2 miles###, ###200 meters###, ###5 kilometers###]'
112
+ :param entity_type: 给所有提取的实体赋予的类型(字符串)
113
+ :return: 一个字典,键是提取出的实体,值是相同的 entity_type
114
+ """
115
+ # 提取被 ### 包裹的内容
116
+ extracted_entities = re.findall(r'###(.*?)###', respond)
117
+
118
+ # 生成字典,将所有实体映射到同一个类型
119
+ entity_dict = {entity.strip(): entity_type for entity in extracted_entities}
120
+
121
+ return entity_dict
122
+
123
+
124
+ def llmapi(text):
125
+ system_prompt = '请你填入需要的提示'
126
+ math_bot_messages = [
127
+ {"role": "system",
128
+ "content": system_prompt},
129
+ {"role": "user", "content": text},
130
+ ]
131
+
132
+ chat_completion = client.chat.completions.create(
133
+ messages=math_bot_messages,
134
+ model=model,
135
+ )
136
+
137
+ result = chat_completion.choices[0].message.content
138
+ return result
139
+
140
+
141
+ # print(extract('[###2 miles###, ###200 meters###, ###5 kilometers###]', 'rse'))
142
+ # print(extract(extract_GPE('Between Burwood and Glebe.'), 'gpe'))
143
+
144
+
145
+