File size: 9,246 Bytes
57746f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
import os
import numpy as np
import cv2
import torch
import torch.multiprocessing as mp
import shutil
def process_scene_on_gpu(gpu_id, scene_names, data_root, target_root, output_queue):
torch.cuda.set_device(gpu_id)
local_pairs = {}
local_images = {}
for scene_name in scene_names:
save_path = os.path.join(target_root, scene_name, "scene_data.npz")
if os.path.exists(save_path):
print(f"Scene {scene_name} already processed, skipping")
continue
pairs, images = process_scene(data_root, target_root, scene_name)
np.savez_compressed(save_path, pairs=pairs, images=images)
output_queue.put((local_pairs, local_images))
def preprocess_scannetpp(data_root, target_root):
# Traverse all the folders in the data_root
scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))]
# Get the number of available GPUs
num_gpus = torch.cuda.device_count()
# Distribute scenes across GPUs
scenes_per_gpu = [scene_names[i::num_gpus] for i in range(num_gpus)]
# Create a multiprocessing queue to collect results
output_queue = mp.Queue()
# Launch parallel processes
processes = []
for gpu_id in range(num_gpus):
p = mp.Process(target=process_scene_on_gpu, args=(gpu_id, scenes_per_gpu[gpu_id], data_root, target_root, output_queue))
p.start()
processes.append(p)
# Collect results from all processes
all_pairs = {}
all_images = {}
for _ in range(num_gpus):
local_pairs, local_images = output_queue.get()
all_pairs.update(local_pairs)
all_images.update(local_images)
# Wait for all processes to complete
for p in processes:
p.join()
# Save to npz file
np.savez_compressed(os.path.join(data_root, "scannet_image_pairs.npz"), **all_pairs)
np.savez_compressed(os.path.join(data_root, "scannet_images.npz"), **all_images)
# print the number of image pairs
# sum up the number of image pairs for all scenes
total_pairs = sum(len(pairs) for pairs in all_pairs.values())
print(f"Total number of image pairs: {total_pairs}")
return all_pairs, all_images
# def preprocess_scannetpp(data_root, target_root):
# # Traverse all the folders in the data_root
# scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))]
# for scene_name in scene_names:
# save_path = os.path.join(target_root, scene_name, "scene_data.npz")
# if os.path.exists(save_path):
# print(f"Scene {scene_name} already processed, skipping")
# continue
# pairs, images = process_scene(data_root, target_root, scene_name)
# np.savez_compressed(save_path, pairs=pairs, images=images)
def process_scene(data_root, target_root, scene_name):
pairs = []
images_dir = os.path.join(data_root, scene_name, "images")
images = [os.path.splitext(file)[0] for file in os.listdir(images_dir) if file.endswith(".JPG")]
images.sort()
# copy images, depths, and camera parameters to target_root
os.makedirs(os.path.join(target_root, scene_name, "images"), exist_ok=True)
os.makedirs(os.path.join(target_root, scene_name, "depths"), exist_ok=True)
for image in images:
shutil.copy(os.path.join(data_root, scene_name, "images", f"{image}.JPG"), os.path.join(target_root, scene_name, "images", f"{image}.JPG"))
shutil.copy(os.path.join(data_root, scene_name, "depths", f"{image}.png"), os.path.join(target_root, scene_name, "depths", f"{image}.png"))
shutil.copy(os.path.join(data_root, scene_name, "images", f"{image}.npz"), os.path.join(target_root, scene_name, "images", f"{image}.npz"))
# Check validity of c2w for each image
valid_images = []
for image in images:
_, c2w, _ = load_image(data_root, scene_name, image)
if is_valid_c2w(c2w):
valid_images.append(image)
else:
print(f"Invalid c2w for image {image} in scene {scene_name}")
# generate image pairs
slide_window = 100
num_sub_intervals = 5
pairs = generate_image_pairs(data_root, scene_name, valid_images, slide_window, num_sub_intervals)
print(f"Scene {scene_name} has {len(pairs)} image pairs and {len(valid_images)} valid images out of {len(images)} total images")
return pairs, valid_images
def is_valid_c2w(c2w):
return not np.any(np.isinf(c2w)) and not np.any(np.isnan(c2w))
def generate_image_pairs(data_root, scene_name, images, slide_window, num_sub_intervals=3):
pairs = []
n = len(images)
# Define IOU sub-intervals
iou_range = (0.3, 0.8)
sub_interval_size = (iou_range[1] - iou_range[0]) / num_sub_intervals
sub_intervals = [(iou_range[0] + i * sub_interval_size, iou_range[0] + (i + 1) * sub_interval_size)
for i in range(num_sub_intervals)]
for i in range(n):
# Keep track of whether a pair has been added for each sub-interval
interval_selected = [False] * num_sub_intervals
for j in range(i+1, min(i + slide_window, n)):
# Break early if all sub-intervals have been selected
if all(interval_selected):
break
# Load image pair
depth1, c2w1, K1 = load_image(data_root, scene_name, images[i])
depth2, c2w2, K2 = load_image(data_root, scene_name, images[j])
# Calculate mean IoU
try:
iou_1 = calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2)
iou_2 = calculate_iou(depth2, c2w2, K2, depth1, c2w1, K1)
except Exception as e:
print(f"Error calculating IoU for images {images[i]} and {images[j]} in scene {scene_name}: {str(e)}")
continue
mean_iou = (iou_1 + iou_2) / 2
# Check which sub-interval the mean IoU falls into
for idx, (lower, upper) in enumerate(sub_intervals):
if lower <= mean_iou <= upper and not interval_selected[idx]:
pairs.append((i, j, mean_iou))
interval_selected[idx] = True # Mark this interval as selected
break # Move to the next pair after adding one in the current sub-interval
return pairs
def load_image(data_root, scene_name, image_id):
# load depthmap
depth_path = f"{data_root}/{scene_name}/depths/{image_id}.png"
depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 1000.0
# load camera parameters
meta_path = f"{data_root}/{scene_name}/images/{image_id}.npz"
meta = np.load(meta_path)
c2w = meta['camera_pose']
K = meta['camera_intrinsics']
return depth, c2w, K
# Unproject depthmap to point cloud and project to another camera
def calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2):
# Move data to GPU and ensure float32 dtype
depth1 = torch.from_numpy(depth1).cuda().float()
depth2 = torch.from_numpy(depth2).cuda().float()
c2w1 = torch.from_numpy(c2w1).cuda().float()
c2w2 = torch.from_numpy(c2w2).cuda().float()
K1 = torch.from_numpy(K1).cuda().float()
K2 = torch.from_numpy(K2).cuda().float()
# Get image dimensions
h, w = depth1.shape
# Create pixel coordinates
y, x = torch.meshgrid(torch.arange(h, device='cuda', dtype=torch.float32),
torch.arange(w, device='cuda', dtype=torch.float32))
pixels = torch.stack((x.flatten(), y.flatten(), torch.ones_like(x.flatten())), dim=-1).T
# Unproject pixels to 3D points
pixels_3d = torch.linalg.inv(K1) @ pixels
pixels_3d *= depth1.flatten().unsqueeze(0)
# Transform 3D points to world coordinates
pixels_world = c2w1[:3, :3] @ pixels_3d + c2w1[:3, 3:4]
# Check if c2w2[:3, :3] is invertible
if torch.det(c2w2[:3, :3]) == 0:
return 0, False # Calculation failed
# Project world points to second camera
pixels_cam2 = torch.linalg.inv(c2w2[:3, :3]) @ (pixels_world - c2w2[:3, 3:4])
pixels_img2 = K2 @ pixels_cam2
# Normalize homogeneous coordinates
pixels_img2 = pixels_img2[:2] / pixels_img2[2]
pixels_img2 = pixels_img2.T
# Filter valid pixels
valid_mask = (pixels_img2[:, 0] >= 0) & (pixels_img2[:, 0] < w) & \
(pixels_img2[:, 1] >= 0) & (pixels_img2[:, 1] < h)
pixels_img2 = pixels_img2[valid_mask].long()
# Compare depths
projected_depth = pixels_cam2[2, valid_mask]
actual_depth = depth2[pixels_img2[:, 1], pixels_img2[:, 0]]
depth_diff = torch.abs(projected_depth - actual_depth)
depth_threshold = 0.1 # 10cm threshold
overlap_mask = depth_diff < depth_threshold
# Calculate IoU
intersection = torch.sum(overlap_mask)
union = torch.sum(valid_mask) + torch.sum(depth2 > 0) - intersection
iou = intersection.float() / union.float() if union > 0 else torch.tensor(0.0, device='cuda')
return iou.item()
if __name__ == "__main__":
data_root = "data/scannetpp_processed"
target_root = "data/scannetpp_target"
preprocess_scannetpp(data_root, target_root)
|