import os import numpy as np import cv2 import torch import torch.multiprocessing as mp import shutil def process_scene_on_gpu(gpu_id, scene_names, data_root, target_root, output_queue): torch.cuda.set_device(gpu_id) local_pairs = {} local_images = {} for scene_name in scene_names: save_path = os.path.join(target_root, scene_name, "scene_data.npz") if os.path.exists(save_path): print(f"Scene {scene_name} already processed, skipping") continue pairs, images = process_scene(data_root, target_root, scene_name) np.savez_compressed(save_path, pairs=pairs, images=images) output_queue.put((local_pairs, local_images)) def preprocess_scannetpp(data_root, target_root): # Traverse all the folders in the data_root scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))] # Get the number of available GPUs num_gpus = torch.cuda.device_count() # Distribute scenes across GPUs scenes_per_gpu = [scene_names[i::num_gpus] for i in range(num_gpus)] # Create a multiprocessing queue to collect results output_queue = mp.Queue() # Launch parallel processes processes = [] for gpu_id in range(num_gpus): p = mp.Process(target=process_scene_on_gpu, args=(gpu_id, scenes_per_gpu[gpu_id], data_root, target_root, output_queue)) p.start() processes.append(p) # Collect results from all processes all_pairs = {} all_images = {} for _ in range(num_gpus): local_pairs, local_images = output_queue.get() all_pairs.update(local_pairs) all_images.update(local_images) # Wait for all processes to complete for p in processes: p.join() # Save to npz file np.savez_compressed(os.path.join(data_root, "scannet_image_pairs.npz"), **all_pairs) np.savez_compressed(os.path.join(data_root, "scannet_images.npz"), **all_images) # print the number of image pairs # sum up the number of image pairs for all scenes total_pairs = sum(len(pairs) for pairs in all_pairs.values()) print(f"Total number of image pairs: {total_pairs}") return all_pairs, all_images # def preprocess_scannetpp(data_root, target_root): # # Traverse all the folders in the data_root # scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))] # for scene_name in scene_names: # save_path = os.path.join(target_root, scene_name, "scene_data.npz") # if os.path.exists(save_path): # print(f"Scene {scene_name} already processed, skipping") # continue # pairs, images = process_scene(data_root, target_root, scene_name) # np.savez_compressed(save_path, pairs=pairs, images=images) def process_scene(data_root, target_root, scene_name): pairs = [] images_dir = os.path.join(data_root, scene_name, "images") images = [os.path.splitext(file)[0] for file in os.listdir(images_dir) if file.endswith(".JPG")] images.sort() # copy images, depths, and camera parameters to target_root os.makedirs(os.path.join(target_root, scene_name, "images"), exist_ok=True) os.makedirs(os.path.join(target_root, scene_name, "depths"), exist_ok=True) for image in images: shutil.copy(os.path.join(data_root, scene_name, "images", f"{image}.JPG"), os.path.join(target_root, scene_name, "images", f"{image}.JPG")) shutil.copy(os.path.join(data_root, scene_name, "depths", f"{image}.png"), os.path.join(target_root, scene_name, "depths", f"{image}.png")) shutil.copy(os.path.join(data_root, scene_name, "images", f"{image}.npz"), os.path.join(target_root, scene_name, "images", f"{image}.npz")) # Check validity of c2w for each image valid_images = [] for image in images: _, c2w, _ = load_image(data_root, scene_name, image) if is_valid_c2w(c2w): valid_images.append(image) else: print(f"Invalid c2w for image {image} in scene {scene_name}") # generate image pairs slide_window = 100 num_sub_intervals = 5 pairs = generate_image_pairs(data_root, scene_name, valid_images, slide_window, num_sub_intervals) print(f"Scene {scene_name} has {len(pairs)} image pairs and {len(valid_images)} valid images out of {len(images)} total images") return pairs, valid_images def is_valid_c2w(c2w): return not np.any(np.isinf(c2w)) and not np.any(np.isnan(c2w)) def generate_image_pairs(data_root, scene_name, images, slide_window, num_sub_intervals=3): pairs = [] n = len(images) # Define IOU sub-intervals iou_range = (0.3, 0.8) sub_interval_size = (iou_range[1] - iou_range[0]) / num_sub_intervals sub_intervals = [(iou_range[0] + i * sub_interval_size, iou_range[0] + (i + 1) * sub_interval_size) for i in range(num_sub_intervals)] for i in range(n): # Keep track of whether a pair has been added for each sub-interval interval_selected = [False] * num_sub_intervals for j in range(i+1, min(i + slide_window, n)): # Break early if all sub-intervals have been selected if all(interval_selected): break # Load image pair depth1, c2w1, K1 = load_image(data_root, scene_name, images[i]) depth2, c2w2, K2 = load_image(data_root, scene_name, images[j]) # Calculate mean IoU try: iou_1 = calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2) iou_2 = calculate_iou(depth2, c2w2, K2, depth1, c2w1, K1) except Exception as e: print(f"Error calculating IoU for images {images[i]} and {images[j]} in scene {scene_name}: {str(e)}") continue mean_iou = (iou_1 + iou_2) / 2 # Check which sub-interval the mean IoU falls into for idx, (lower, upper) in enumerate(sub_intervals): if lower <= mean_iou <= upper and not interval_selected[idx]: pairs.append((i, j, mean_iou)) interval_selected[idx] = True # Mark this interval as selected break # Move to the next pair after adding one in the current sub-interval return pairs def load_image(data_root, scene_name, image_id): # load depthmap depth_path = f"{data_root}/{scene_name}/depths/{image_id}.png" depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 1000.0 # load camera parameters meta_path = f"{data_root}/{scene_name}/images/{image_id}.npz" meta = np.load(meta_path) c2w = meta['camera_pose'] K = meta['camera_intrinsics'] return depth, c2w, K # Unproject depthmap to point cloud and project to another camera def calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2): # Move data to GPU and ensure float32 dtype depth1 = torch.from_numpy(depth1).cuda().float() depth2 = torch.from_numpy(depth2).cuda().float() c2w1 = torch.from_numpy(c2w1).cuda().float() c2w2 = torch.from_numpy(c2w2).cuda().float() K1 = torch.from_numpy(K1).cuda().float() K2 = torch.from_numpy(K2).cuda().float() # Get image dimensions h, w = depth1.shape # Create pixel coordinates y, x = torch.meshgrid(torch.arange(h, device='cuda', dtype=torch.float32), torch.arange(w, device='cuda', dtype=torch.float32)) pixels = torch.stack((x.flatten(), y.flatten(), torch.ones_like(x.flatten())), dim=-1).T # Unproject pixels to 3D points pixels_3d = torch.linalg.inv(K1) @ pixels pixels_3d *= depth1.flatten().unsqueeze(0) # Transform 3D points to world coordinates pixels_world = c2w1[:3, :3] @ pixels_3d + c2w1[:3, 3:4] # Check if c2w2[:3, :3] is invertible if torch.det(c2w2[:3, :3]) == 0: return 0, False # Calculation failed # Project world points to second camera pixels_cam2 = torch.linalg.inv(c2w2[:3, :3]) @ (pixels_world - c2w2[:3, 3:4]) pixels_img2 = K2 @ pixels_cam2 # Normalize homogeneous coordinates pixels_img2 = pixels_img2[:2] / pixels_img2[2] pixels_img2 = pixels_img2.T # Filter valid pixels valid_mask = (pixels_img2[:, 0] >= 0) & (pixels_img2[:, 0] < w) & \ (pixels_img2[:, 1] >= 0) & (pixels_img2[:, 1] < h) pixels_img2 = pixels_img2[valid_mask].long() # Compare depths projected_depth = pixels_cam2[2, valid_mask] actual_depth = depth2[pixels_img2[:, 1], pixels_img2[:, 0]] depth_diff = torch.abs(projected_depth - actual_depth) depth_threshold = 0.1 # 10cm threshold overlap_mask = depth_diff < depth_threshold # Calculate IoU intersection = torch.sum(overlap_mask) union = torch.sum(valid_mask) + torch.sum(depth2 > 0) - intersection iou = intersection.float() / union.float() if union > 0 else torch.tensor(0.0, device='cuda') return iou.item() if __name__ == "__main__": data_root = "data/scannetpp_processed" target_root = "data/scannetpp_target" preprocess_scannetpp(data_root, target_root)