import requests import urllib3 import json from utils import geoutil import regex_spatial from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping import re import geopandas as gpd from geocoder import geo_level1 from openai import OpenAI import numpy as np client = OpenAI( api_key='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' ) model = "gpt-4o" north = ["north", "N'", "North", "NORTH"] south = ["south", "S'", "South", "SOUTH"] east = ["east", "E'", "East", "EAST"] west = ["west", "W'", "West", "WEST"] northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"] southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"] northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"] southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"] center = ["center","central", "downtown","midtown"] dataset_path = 'dataset/dataset_20.json' def get_geojson(ent, arr, centroid): poly_json = {} poly_json['type'] = 'FeatureCollection' poly_json['features'] = [] coordinates= [] coordinates.append(arr) poly_json['features'].append({ 'type':'Feature', 'id': ent, 'properties': { 'centroid': centroid }, 'geometry': { 'type':'Polygon', 'coordinates': coordinates } }) return poly_json def get_coordinates(ent): request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2' headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15" } page = requests.get(request_url, headers=headers, verify=False) json_content = json.loads(page.content) all_coordinates = json_content[0]['geojson']['coordinates'][0] centroid = (float(json_content[0]['lon']), float(json_content[0]['lat'])) for p in all_coordinates: p2 = (p[0], p[1]) angle = geoutil.calculate_bearing(centroid, p2) p.append(angle) geojson = get_geojson(ent, all_coordinates, centroid) return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid'] # level3 def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum): # minimum = 157 # maximum = 202 direction_coordinates = [] for p in coordinates: angle = geoutil.calculate_bearing(centroid, p) p2 = (p[0], p[1], angle) if direction in geo_level1.east: if angle >= minimum or angle <= maximum: direction_coordinates.append(p2) else: if angle >= minimum and angle <= maximum: direction_coordinates.append(p2) # print(type(direction_coordinates[0])) # if(direction in geo_level1.west): # direction_coordinates.sort(key=lambda k: k[2], reverse=True) return direction_coordinates def get_level3(level3): digits = re.findall('[0-9]+', level3)[0] unit = re.findall('[A-Za-z]+', level3)[0] return digits, unit def get_direction_coordinates(coordinates, centroid, level1): min_max = geo_level1.get_min_max(level1) if min_max is not None: coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1]) return coord return coordinates def sort_west(poly1, poly2, centroid): coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"] coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"] coord1 = [] coord2 = [] coord = [] for c in coords1: pol = list(c[::-1]) coord1.extend(pol) for c in coords2: pol = list(c[::-1]) coord2.extend(pol) coo1 = [] coo2 = [] for p in coord1: angle = geoutil.calculate_bearing(centroid, p) if angle >= 157 and angle <= 202: coo1.append((p[0], p[1], angle)) for p in coord2: angle = geoutil.calculate_bearing(centroid, p) if angle >= 157 and angle <= 202: coo2.append((p[0], p[1], angle)) coo1.extend(coo2) return coo1 def get_level3_coordinates(coordinates, level_3, level1): distance, unit = get_level3(level_3) kms = geoutil.get_kilometers(distance, unit) coord = [] poly1 = Polygon(coordinates[0]) polygon1 = gpd.GeoSeries(poly1) poly2 = polygon1.buffer(0.0095 * kms, join_style=2) poly3 = polygon1.buffer(0.013 * kms, join_style=2) poly = poly3.difference(poly2) coords = mapping(poly)["features"][0]["geometry"]["coordinates"] for c in coords: pol = list(c[::-1]) coord.extend(pol) if level1 is not None: coord = get_direction_coordinates(coord, coordinates[1], level1) if level1 in geo_level1.west: coord = sort_west(poly3, poly2, coordinates[1]) # 计算新 coord 的几何中心(质心) if coord: center_point = MultiPoint(coord).centroid center = (center_point.x, center_point.y) else: center = coordinates[1] # fallback: 原始中心点 return coord, center # level 3 end # between def get_between_coordinates(coordinates1, coordinates2): """ 计算两个区域之间的中间点,并生成一个等面积的圆形区域。 :param coordinates1: 第一个区域的边界坐标和中心点 :param coordinates2: 第二个区域的边界坐标和中心点 :return: 圆形区域的坐标集和圆心 """ # 创建多边形对象 poly1 = Polygon(coordinates1[0]) poly2 = Polygon(coordinates2[0]) # 计算两个区域的面积(近似 km²,需进一步优化投影转换) area1 = poly1.area area2 = poly2.area # 计算平均面积 avg_area = (area1 + area2) / 2 # 计算等面积圆的半径 r(单位 km) r_km = np.sqrt(avg_area / np.pi) * 111.32 # 使得 πr² ≈ avg_area # 计算圆心(两个中心点的中点) midpoint = ((coordinates1[1][0] + coordinates2[1][0]) / 2, (coordinates1[1][1] + coordinates2[1][1]) / 2) # 计算地球上的 1 度经纬度距离(近似值) lat_km = 111.32 # 1 度纬度 ≈ 111.32 km lon_km = 111.32 * np.cos(np.radians(midpoint[1])) # 1 度经度 ≈ 111.32 × cos(纬度) # 计算以 midpoint 为圆心,半径 r_km 的圆形坐标集 circle_points = [] for theta in np.linspace(0, 360, num=100): # 生成 100 个点构成圆形 theta_rad = np.radians(theta) d_lat = (np.sin(theta_rad) * r_km) / lat_km d_lon = (np.cos(theta_rad) * r_km) / lon_km circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat)) return circle_points, midpoint # between end def llmapi(text): system_prompt = ( "你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n" "你能选择的定位函数有:\n" "1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n" "2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n" "请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<>>` 和 `<<>>` 包裹起来。\n" "请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n" "每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n" "所有方向必须使用英文(如 south, west, northeast, etc.)。\n" "示例输出:\n" "<<>>\n" "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]}," "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]}," "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]}," "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n" "<<>>") messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": text}, ] chat_completion = client.chat.completions.create( messages=messages, model=model, ) result = chat_completion.choices[0].message.content json_match = re.search(r'<<>>\n(.*?)\n<<>>', result, re.DOTALL) if json_match: # print(json.loads(json_match.group(1))) return json.loads(json_match.group(1)) else: raise ValueError("LLM 输出未包含预期的 JSON 格式数据。") def llmapi(text): system_prompt = ( "You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n" "The positioning functions you can choose from are:\n" "1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n" "2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n" "You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations." "First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<>>` and `<<>>`.\n" "Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n" "Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n" "All directions must be in English (e.g., south, west, northeast, etc.).\n" "Example output:\n" "<<>>\n" "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]}," "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]}," "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]}," "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n" "<<>>") messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": text}, ] chat_completion = client.chat.completions.create( messages=messages, model=model, ) result = chat_completion.choices[0].message.content print(result) json_match = re.search(r'<<>>\n(.*?)\n<<>>', result, re.DOTALL) if json_match: return json.loads(json_match.group(1)) else: raise ValueError("LLM 输出未包含预期的 JSON 格式数据。") def get_coordinates(location): request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2' headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(request_url, headers=headers, verify=False) json_content = json.loads(response.content) coordinates = json_content[0]['geojson']['coordinates'][0] centroid = (float(json_content[0]['lon']), float(json_content[0]['lat'])) return (coordinates, centroid) def execute_steps(steps): data = {} for step in steps: step_id = step['id'] function = step['function'] inputs = step['inputs'] # print('-' * 50) # print(function) # print(inputs) resolved_inputs = [] for inp in inputs: if isinstance(inp, int): resolved_inputs.append(data[inp]) else: resolved_inputs.append(inp) if function == "Relative": location, direction, distance = resolved_inputs if isinstance(location, str): location = get_coordinates(location) result = get_level3_coordinates(location, distance, direction) print(location, distance, direction, 'rrr') print(result) data[step_id] = result elif function == "Between": location1, location2 = resolved_inputs if isinstance(location1, str): location1 = get_coordinates(location1) location1 = [location1[0]] + list(location1[1:]) # location1 = [location1[0][0]] + list(location1[1:]) # location1[0] = location1[0] if isinstance(location2, str): location2 = get_coordinates(location2) print(location2) location2 = [location2[0]] + list(location2[1:]) # location2 = [location2[0][0]] + list(location2[1:]) print(location1) result = get_between_coordinates(location1, location2) print(location1, location2, 'bbb') print(result) data[step_id] = result return data # a = get_coordinates('Burwood') # a2 = get_coordinates('Glebe') # b = get_level3_coordinates(a, '5 km', 'east') # c = get_between_coordinates(a, a2) # 完整通道 # 默认输入 default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。" default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。" default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置" default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel." # default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney." with open(dataset_path, 'r', encoding='utf-8') as f: data = json.load(f) answer_path = 'answer/GPT4o.json' answer = [] for i in data: parsed_steps = llmapi(i['instruction']) # parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}] i["steps"] = parsed_steps # print(i['instruction']) with open(answer_path, "r", encoding="utf-8") as f: try: datapoint = json.load(f) except: datapoint = [] datapoint.append(i) # print(answer) with open(answer_path, "w", encoding="utf-8") as f: json.dump(datapoint, f, ensure_ascii=False, indent=2) # 格式转化 def write_custom_json(data, filename): def format_step(step): inputs = json.dumps(step["inputs"], ensure_ascii=False) return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}' with open(filename, "w", encoding="utf-8") as f: f.write("[\n") for i, item in enumerate(data): f.write(" {\n") f.write(f' "index": {item["index"]},\n') instruction = json.dumps(item["instruction"], ensure_ascii=False) f.write(f' "instruction": {instruction},\n') f.write(' "steps": [\n') step_lines = [f" {format_step(step)}" for step in item["steps"]] f.write(",\n".join(step_lines)) f.write("\n ]\n") f.write(" }" + (",\n" if i < len(data) - 1 else "\n")) f.write("]\n") # Regenerate custom formatted JSON with open(answer_path, "r", encoding="utf-8") as f: data = json.load(f) write_custom_json(data, answer_path)