Spaces:

thewhole
/

GaussianDreamer_Demo

Runtime error

App Files Files Community

GaussianDreamer_Demo / threestudio /data /co3d.py

thewhole

Upload 245 files

2fa4776 over 1 year ago

raw

history blame contribute delete

25.9 kB

	import gzip
	import json
	import os
	import warnings
	from dataclasses import dataclass, field
	from typing import List

	import cv2
	import numpy as np
	import pytorch_lightning as pl
	import torch
	import torchvision.transforms.functional as TF
	from PIL import Image
	from torch.utils.data import DataLoader, Dataset, IterableDataset

	from threestudio import register
	from threestudio.data.uncond import (
	RandomCameraDataModuleConfig,
	RandomCameraDataset,
	RandomCameraIterableDataset,
	)
	from threestudio.utils.config import parse_structured
	from threestudio.utils.misc import get_rank
	from threestudio.utils.ops import (
	get_mvp_matrix,
	get_projection_matrix,
	get_ray_directions,
	get_rays,
	)
	from threestudio.utils.typing import *


	def _load_16big_png_depth(depth_png) -> np.ndarray:
	with Image.open(depth_png) as depth_pil:
	# the image is stored with 16-bit depth but PIL reads it as I (32 bit).
	# we cast it to uint16, then reinterpret as float16, then cast to float32
	depth = (
	np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16)
	.astype(np.float32)
	.reshape((depth_pil.size[1], depth_pil.size[0]))
	)
	return depth


	def _load_depth(path, scale_adjustment) -> np.ndarray:
	if not path.lower().endswith(".png"):
	raise ValueError('unsupported depth file name "%s"' % path)

	d = _load_16big_png_depth(path) * scale_adjustment
	d[~np.isfinite(d)] = 0.0
	return d[None] # fake feature channel


	# Code adapted from https://github.com/eldar/snes/blob/473ff2b1f6/3rdparty/co3d/dataset/co3d_dataset.py
	def _get_1d_bounds(arr):
	nz = np.flatnonzero(arr)
	return nz[0], nz[-1]


	def get_bbox_from_mask(mask, thr, decrease_quant=0.05):
	# bbox in xywh
	masks_for_box = np.zeros_like(mask)
	while masks_for_box.sum() <= 1.0:
	masks_for_box = (mask > thr).astype(np.float32)
	thr -= decrease_quant
	if thr <= 0.0:
	warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.")

	x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
	y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))

	return x0, y0, x1 - x0, y1 - y0


	def get_clamp_bbox(bbox, box_crop_context=0.0, impath=""):
	# box_crop_context: rate of expansion for bbox
	# returns possibly expanded bbox xyxy as float

	# increase box size
	if box_crop_context > 0.0:
	c = box_crop_context
	bbox = bbox.astype(np.float32)
	bbox[0] -= bbox[2] * c / 2
	bbox[1] -= bbox[3] * c / 2
	bbox[2] += bbox[2] * c
	bbox[3] += bbox[3] * c

	if (bbox[2:] <= 1.0).any():
	warnings.warn(f"squashed image {impath}!!")
	return None

	# bbox[2:] = np.clip(bbox[2:], 2, )
	bbox[2:] = np.maximum(bbox[2:], 2)
	bbox[2:] += bbox[0:2] + 1 # convert to [xmin, ymin, xmax, ymax]
	# +1 because upper bound is not inclusive

	return bbox


	def crop_around_box(tensor, bbox, impath=""):
	bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0.0, tensor.shape[-2])
	bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0.0, tensor.shape[-3])
	bbox = bbox.round().astype(np.longlong)
	return tensor[bbox[1] : bbox[3], bbox[0] : bbox[2], ...]


	def resize_image(image, height, width, mode="bilinear"):
	if image.shape[:2] == (height, width):
	return image, 1.0, np.ones_like(image[..., :1])

	image = torch.from_numpy(image).permute(2, 0, 1)
	minscale = min(height / image.shape[-2], width / image.shape[-1])
	imre = torch.nn.functional.interpolate(
	image[None],
	scale_factor=minscale,
	mode=mode,
	align_corners=False if mode == "bilinear" else None,
	recompute_scale_factor=True,
	)[0]

	# pyre-fixme[19]: Expected 1 positional argument.
	imre_ = torch.zeros(image.shape[0], height, width)
	imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
	# pyre-fixme[6]: For 2nd param expected `int` but got `Optional[int]`.
	# pyre-fixme[6]: For 3rd param expected `int` but got `Optional[int]`.
	mask = torch.zeros(1, height, width)
	mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
	return imre_.permute(1, 2, 0).numpy(), minscale, mask.permute(1, 2, 0).numpy()


	# Code adapted from https://github.com/POSTECH-CVLab/PeRFception/data_util/co3d.py
	def similarity_from_cameras(c2w, fix_rot=False, radius=1.0):
	"""
	Get a similarity transform to normalize dataset
	from c2w (OpenCV convention) cameras
	:param c2w: (N, 4)
	:return T (4,4) , scale (float)
	"""
	t = c2w[:, :3, 3]
	R = c2w[:, :3, :3]

	# (1) Rotate the world so that z+ is the up axis
	# we estimate the up axis by averaging the camera up axes
	ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1)
	world_up = np.mean(ups, axis=0)
	world_up /= np.linalg.norm(world_up)

	up_camspace = np.array([0.0, 0.0, 1.0])
	c = (up_camspace * world_up).sum()
	cross = np.cross(world_up, up_camspace)
	skew = np.array(
	[
	[0.0, -cross[2], cross[1]],
	[cross[2], 0.0, -cross[0]],
	[-cross[1], cross[0], 0.0],
	]
	)
	if c > -1:
	R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c)
	else:
	# In the unlikely case the original data has y+ up axis,
	# rotate 180-deg about x axis
	R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])

	if fix_rot:
	R_align = np.eye(3)
	R = np.eye(3)
	else:
	R = R_align @ R
	fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1)
	t = (R_align @ t[..., None])[..., 0]

	# (2) Recenter the scene using camera center rays
	# find the closest point to the origin for each camera's center ray
	nearest = t + (fwds * -t).sum(-1)[:, None] * fwds

	# median for more robustness
	translate = -np.median(nearest, axis=0)

	# translate = -np.mean(t, axis=0) # DEBUG

	transform = np.eye(4)
	transform[:3, 3] = translate
	transform[:3, :3] = R_align

	# (3) Rescale the scene using camera distances
	scale = radius / np.median(np.linalg.norm(t + translate, axis=-1))

	return transform, scale


	@dataclass
	class Co3dDataModuleConfig:
	root_dir: str = ""
	batch_size: int = 1
	height: int = 256
	width: int = 256
	load_preprocessed: bool = False
	cam_scale_factor: float = 0.95
	max_num_frames: int = 300
	v2_mode: bool = True
	use_mask: bool = True
	box_crop: bool = True
	box_crop_mask_thr: float = 0.4
	box_crop_context: float = 0.3
	train_num_rays: int = -1
	train_views: Optional[list] = None
	train_split: str = "train"
	val_split: str = "val"
	test_split: str = "test"
	scale_radius: float = 1.0
	use_random_camera: bool = True
	random_camera: dict = field(default_factory=dict)
	rays_noise_scale: float = 0.0
	render_path: str = "circle"


	class Co3dDatasetBase:
	def setup(self, cfg, split):
	self.split = split
	self.rank = get_rank()
	self.cfg: Co3dDataModuleConfig = cfg

	if self.cfg.use_random_camera:
	random_camera_cfg = parse_structured(
	RandomCameraDataModuleConfig, self.cfg.get("random_camera", {})
	)
	if split == "train":
	self.random_pose_generator = RandomCameraIterableDataset(
	random_camera_cfg
	)
	else:
	self.random_pose_generator = RandomCameraDataset(
	random_camera_cfg, split
	)

	self.use_mask = self.cfg.use_mask
	cam_scale_factor = self.cfg.cam_scale_factor

	assert os.path.exists(self.cfg.root_dir), f"{self.cfg.root_dir} doesn't exist!"

	cam_trans = np.diag(np.array([-1, -1, 1, 1], dtype=np.float32))
	scene_number = self.cfg.root_dir.split("/")[-1]
	json_path = os.path.join(self.cfg.root_dir, "..", "frame_annotations.jgz")
	with gzip.open(json_path, "r") as fp:
	all_frames_data = json.load(fp)

	frame_data, images, intrinsics, extrinsics, image_sizes = [], [], [], [], []
	masks = []
	depths = []

	for temporal_data in all_frames_data:
	if temporal_data["sequence_name"] == scene_number:
	frame_data.append(temporal_data)

	self.all_directions = []
	self.all_fg_masks = []
	for frame in frame_data:
	if "unseen" in frame["meta"]["frame_type"]:
	continue
	img = cv2.imread(
	os.path.join(self.cfg.root_dir, "..", "..", frame["image"]["path"])
	)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0

	# TODO: use estimated depth
	depth = _load_depth(
	os.path.join(self.cfg.root_dir, "..", "..", frame["depth"]["path"]),
	frame["depth"]["scale_adjustment"],
	)[0]

	H, W = frame["image"]["size"]
	image_size = np.array([H, W])
	fxy = np.array(frame["viewpoint"]["focal_length"])
	cxy = np.array(frame["viewpoint"]["principal_point"])
	R = np.array(frame["viewpoint"]["R"])
	T = np.array(frame["viewpoint"]["T"])

	if self.cfg.v2_mode:
	min_HW = min(W, H)
	image_size_half = np.array([W * 0.5, H * 0.5], dtype=np.float32)
	scale_arr = np.array([min_HW * 0.5, min_HW * 0.5], dtype=np.float32)
	fxy_x = fxy * scale_arr
	prp_x = np.array([W * 0.5, H * 0.5], dtype=np.float32) - cxy * scale_arr
	cxy = (image_size_half - prp_x) / image_size_half
	fxy = fxy_x / image_size_half

	scale_arr = np.array([W * 0.5, H * 0.5], dtype=np.float32)
	focal = fxy * scale_arr
	prp = -1.0 * (cxy - 1.0) * scale_arr

	pose = np.eye(4)
	pose[:3, :3] = R
	pose[:3, 3:] = -R @ T[..., None]
	# original camera: x left, y up, z in (Pytorch3D)
	# transformed camera: x right, y down, z in (OpenCV)
	pose = pose @ cam_trans
	intrinsic = np.array(
	[
	[focal[0], 0.0, prp[0], 0.0],
	[0.0, focal[1], prp[1], 0.0],
	[0.0, 0.0, 1.0, 0.0],
	[0.0, 0.0, 0.0, 1.0],
	]
	)

	if any([np.all(pose == _pose) for _pose in extrinsics]):
	continue

	image_sizes.append(image_size)
	intrinsics.append(intrinsic)
	extrinsics.append(pose)
	images.append(img)
	depths.append(depth)
	self.all_directions.append(get_ray_directions(W, H, focal, prp))

	# vis_utils.vis_depth_pcd([depth], [pose], intrinsic, [(img * 255).astype(np.uint8)])

	if self.use_mask:
	mask = np.array(
	Image.open(
	os.path.join(
	self.cfg.root_dir, "..", "..", frame["mask"]["path"]
	)
	)
	)
	mask = mask.astype(np.float32) / 255.0 # (h, w)
	else:
	mask = torch.ones_like(img[..., 0])
	self.all_fg_masks.append(mask)

	intrinsics = np.stack(intrinsics)
	extrinsics = np.stack(extrinsics)
	image_sizes = np.stack(image_sizes)
	self.all_directions = torch.stack(self.all_directions, dim=0)
	self.all_fg_masks = np.stack(self.all_fg_masks, 0)

	H_median, W_median = np.median(
	np.stack([image_size for image_size in image_sizes]), axis=0
	)

	H_inlier = np.abs(image_sizes[:, 0] - H_median) / H_median < 0.1
	W_inlier = np.abs(image_sizes[:, 1] - W_median) / W_median < 0.1
	inlier = np.logical_and(H_inlier, W_inlier)
	dists = np.linalg.norm(
	extrinsics[:, :3, 3] - np.median(extrinsics[:, :3, 3], axis=0), axis=-1
	)
	med = np.median(dists)
	good_mask = dists < (med * 5.0)
	inlier = np.logical_and(inlier, good_mask)

	if inlier.sum() != 0:
	intrinsics = intrinsics[inlier]
	extrinsics = extrinsics[inlier]
	image_sizes = image_sizes[inlier]
	images = [images[i] for i in range(len(inlier)) if inlier[i]]
	depths = [depths[i] for i in range(len(inlier)) if inlier[i]]
	self.all_directions = self.all_directions[inlier]
	self.all_fg_masks = self.all_fg_masks[inlier]

	extrinsics = np.stack(extrinsics)
	T, sscale = similarity_from_cameras(extrinsics, radius=self.cfg.scale_radius)
	extrinsics = T @ extrinsics

	extrinsics[:, :3, 3] = sscale cam_scale_factor

	depths = [depth * sscale * cam_scale_factor for depth in depths]

	num_frames = len(extrinsics)

	if self.cfg.max_num_frames < num_frames:
	num_frames = self.cfg.max_num_frames
	extrinsics = extrinsics[:num_frames]
	intrinsics = intrinsics[:num_frames]
	image_sizes = image_sizes[:num_frames]
	images = images[:num_frames]
	depths = depths[:num_frames]
	self.all_directions = self.all_directions[:num_frames]
	self.all_fg_masks = self.all_fg_masks[:num_frames]

	if self.cfg.box_crop:
	print("cropping...")
	crop_masks = []
	crop_imgs = []
	crop_depths = []
	crop_directions = []
	crop_xywhs = []
	max_sl = 0
	for i in range(num_frames):
	bbox_xywh = np.array(
	get_bbox_from_mask(self.all_fg_masks[i], self.cfg.box_crop_mask_thr)
	)
	clamp_bbox_xywh = get_clamp_bbox(bbox_xywh, self.cfg.box_crop_context)
	max_sl = max(clamp_bbox_xywh[2] - clamp_bbox_xywh[0], max_sl)
	max_sl = max(clamp_bbox_xywh[3] - clamp_bbox_xywh[1], max_sl)
	mask = crop_around_box(self.all_fg_masks[i][..., None], clamp_bbox_xywh)
	img = crop_around_box(images[i], clamp_bbox_xywh)
	depth = crop_around_box(depths[i][..., None], clamp_bbox_xywh)

	# resize to the same shape
	mask, _, _ = resize_image(mask, self.cfg.height, self.cfg.width)
	depth, _, _ = resize_image(depth, self.cfg.height, self.cfg.width)
	img, scale, _ = resize_image(img, self.cfg.height, self.cfg.width)
	fx, fy, cx, cy = (
	intrinsics[i][0, 0],
	intrinsics[i][1, 1],
	intrinsics[i][0, 2],
	intrinsics[i][1, 2],
	)

	crop_masks.append(mask)
	crop_imgs.append(img)
	crop_depths.append(depth)
	crop_xywhs.append(clamp_bbox_xywh)
	crop_directions.append(
	get_ray_directions(
	self.cfg.height,
	self.cfg.width,
	(fx * scale, fy * scale),
	(
	(cx - clamp_bbox_xywh[0]) * scale,
	(cy - clamp_bbox_xywh[1]) * scale,
	),
	)
	)

	# # pad all images to the same shape
	# for i in range(num_frames):
	# uh = (max_sl - crop_imgs[i].shape[0]) // 2 # h
	# dh = max_sl - crop_imgs[i].shape[0] - uh
	# lw = (max_sl - crop_imgs[i].shape[1]) // 2
	# rw = max_sl - crop_imgs[i].shape[1] - lw
	# crop_masks[i] = np.pad(crop_masks[i], pad_width=((uh, dh), (lw, rw), (0, 0)), mode='constant', constant_values=0.)
	# crop_imgs[i] = np.pad(crop_imgs[i], pad_width=((uh, dh), (lw, rw), (0, 0)), mode='constant', constant_values=1.)
	# crop_depths[i] = np.pad(crop_depths[i], pad_width=((uh, dh), (lw, rw), (0, 0)), mode='constant', constant_values=0.)
	# fx, fy, cx, cy = intrinsics[i][0, 0], intrinsics[i][1, 1], intrinsics[i][0, 2], intrinsics[i][1, 2]
	# crop_directions.append(get_ray_directions(max_sl, max_sl, (fx, fy), (cx - crop_xywhs[i][0] + lw, cy - crop_xywhs[i][1] + uh)))
	# self.w, self.h = max_sl, max_sl

	images = crop_imgs
	depths = crop_depths
	self.all_fg_masks = np.stack(crop_masks, 0)
	self.all_directions = torch.from_numpy(np.stack(crop_directions, 0))

	# self.width, self.height = self.w, self.h

	self.all_c2w = torch.from_numpy(
	(
	extrinsics
	@ np.diag(np.array([1, -1, -1, 1], dtype=np.float32))[None, ...]
	)[..., :3, :4]
	)
	self.all_images = torch.from_numpy(np.stack(images, axis=0))
	self.all_depths = torch.from_numpy(np.stack(depths, axis=0))

	# self.all_c2w = []
	# self.all_images = []
	# for i in range(num_frames):
	# # convert to: x right, y up, z back (OpenGL)
	# c2w = torch.from_numpy(extrinsics[i] @ np.diag(np.array([1, -1, -1, 1], dtype=np.float32)))[:3, :4]
	# self.all_c2w.append(c2w)
	# img = torch.from_numpy(images[i])
	# self.all_images.append(img)

	# TODO: save data for fast loading next time
	if self.cfg.load_preprocessed and os.path.exists(
	self.cfg.root_dir, "nerf_preprocessed.npy"
	):
	pass

	i_all = np.arange(num_frames)

	if self.cfg.train_views is None:
	i_test = i_all[::10]
	i_val = i_test
	i_train = np.array([i for i in i_all if not i in i_test])
	else:
	# use provided views
	i_train = self.cfg.train_views
	i_test = np.array([i for i in i_all if not i in i_train])
	i_val = i_test

	if self.split == "train":
	print("[INFO] num of train views: ", len(i_train))
	print("[INFO] train view ids = ", i_train)

	i_split = {"train": i_train, "val": i_val, "test": i_all}

	# if self.split == 'test':
	# self.all_c2w = create_spheric_poses(self.all_c2w[:,:,3], n_steps=self.cfg.n_test_traj_steps)
	# self.all_images = torch.zeros((self.cfg.n_test_traj_steps, self.h, self.w, 3), dtype=torch.float32)
	# self.all_fg_masks = torch.zeros((self.cfg.n_test_traj_steps, self.h, self.w), dtype=torch.float32)
	# self.directions = self.directions[0].to(self.rank)
	# else:
	self.all_images, self.all_c2w = (
	self.all_images[i_split[self.split]],
	self.all_c2w[i_split[self.split]],
	)
	self.all_directions = self.all_directions[i_split[self.split]].to(self.rank)
	self.all_fg_masks = torch.from_numpy(self.all_fg_masks)[i_split[self.split]]
	self.all_depths = self.all_depths[i_split[self.split]]
	# if render_random_pose:
	# render_poses = random_pose(extrinsics[i_all], 50)
	# elif render_scene_interp:
	# render_poses = pose_interp(extrinsics[i_all], interp_fac)
	# render_poses = spherical_poses(sscale * cam_scale_factor * np.eye(4))

	# near, far = 0., 1.
	# ndc_coeffs = (-1., -1.)

	self.all_c2w, self.all_images, self.all_fg_masks = (
	self.all_c2w.float().to(self.rank),
	self.all_images.float().to(self.rank),
	self.all_fg_masks.float().to(self.rank),
	)

	# self.all_c2w, self.all_images, self.all_fg_masks = \
	# self.all_c2w.float(), \
	# self.all_images.float(), \
	# self.all_fg_masks.float()

	self.all_depths = self.all_depths.float().to(self.rank)

	def get_all_images(self):
	return self.all_images


	class Co3dDataset(Dataset, Co3dDatasetBase):
	def __init__(self, cfg, split):
	self.setup(cfg, split)

	def __len__(self):
	if self.split == "test":
	if self.cfg.render_path == "circle":
	return len(self.random_pose_generator)
	else:
	return len(self.all_images)
	else:
	return len(self.random_pose_generator)
	# return len(self.all_images)

	def prepare_data(self, index):
	# prepare batch data here
	c2w = self.all_c2w[index]
	light_positions = c2w[..., :3, -1]
	directions = self.all_directions[index]
	rays_o, rays_d = get_rays(
	directions, c2w, keepdim=True, noise_scale=self.cfg.rays_noise_scale
	)
	rgb = self.all_images[index]
	depth = self.all_depths[index]
	mask = self.all_fg_masks[index]

	# TODO: get projection matrix and mvp matrix
	# proj_mtx = get_projection_matrix()

	batch = {
	"rays_o": rays_o,
	"rays_d": rays_d,
	"mvp_mtx": 0,
	"camera_positions": c2w[..., :3, -1],
	"light_positions": light_positions,
	"elevation": 0,
	"azimuth": 0,
	"camera_distances": 0,
	"rgb": rgb,
	"depth": depth,
	"mask": mask,
	}

	# c2w = self.all_c2w[index]
	# return {
	# 'index': index,
	# 'c2w': c2w,
	# 'light_positions': c2w[:3, -1],
	# 'H': self.h,
	# 'W': self.w
	# }

	return batch

	def __getitem__(self, index):
	if self.split == "test":
	if self.cfg.render_path == "circle":
	return self.random_pose_generator[index]
	else:
	return self.prepare_data(index)
	else:
	return self.random_pose_generator[index]


	class Co3dIterableDataset(IterableDataset, Co3dDatasetBase):
	def __init__(self, cfg, split):
	self.setup(cfg, split)
	self.idx = 0
	self.image_perm = torch.randperm(len(self.all_images))

	def __iter__(self):
	while True:
	yield {}

	def collate(self, batch) -> Dict[str, Any]:
	idx = self.image_perm[self.idx]
	# prepare batch data here
	c2w = self.all_c2w[idx][None]
	light_positions = c2w[..., :3, -1]
	directions = self.all_directions[idx][None]
	rays_o, rays_d = get_rays(
	directions, c2w, keepdim=True, noise_scale=self.cfg.rays_noise_scale
	)
	rgb = self.all_images[idx][None]
	depth = self.all_depths[idx][None]
	mask = self.all_fg_masks[idx][None]

	if (
	self.cfg.train_num_rays != -1
	and self.cfg.train_num_rays < self.cfg.height * self.cfg.width
	):
	_, height, width, _ = rays_o.shape
	x = torch.randint(
	0, width, size=(self.cfg.train_num_rays,), device=rays_o.device
	)
	y = torch.randint(
	0, height, size=(self.cfg.train_num_rays,), device=rays_o.device
	)

	rays_o = rays_o[:, y, x].unsqueeze(-2)
	rays_d = rays_d[:, y, x].unsqueeze(-2)
	directions = directions[:, y, x].unsqueeze(-2)
	rgb = rgb[:, y, x].unsqueeze(-2)
	mask = mask[:, y, x].unsqueeze(-2)
	depth = depth[:, y, x].unsqueeze(-2)

	# TODO: get projection matrix and mvp matrix
	# proj_mtx = get_projection_matrix()

	batch = {
	"rays_o": rays_o,
	"rays_d": rays_d,
	"mvp_mtx": None,
	"camera_positions": c2w[..., :3, -1],
	"light_positions": light_positions,
	"elevation": None,
	"azimuth": None,
	"camera_distances": None,
	"rgb": rgb,
	"depth": depth,
	"mask": mask,
	}

	if self.cfg.use_random_camera:
	batch["random_camera"] = self.random_pose_generator.collate(None)

	# prepare batch data in system
	# c2w = self.all_c2w[idx][None]

	# batch = {
	# 'index': torch.tensor([idx]),
	# 'c2w': c2w,
	# 'light_positions': c2w[..., :3, -1],
	# 'H': self.h,
	# 'W': self.w
	# }

	self.idx += 1
	if self.idx == len(self.all_images):
	self.idx = 0
	self.image_perm = torch.randperm(len(self.all_images))
	# self.idx = (self.idx + 1) % len(self.all_images)

	return batch


	@register("co3d-datamodule")
	class Co3dDataModule(pl.LightningDataModule):
	def __init__(self, cfg: Optional[Union[dict, DictConfig]] = None) -> None:
	super().__init__()
	self.cfg = parse_structured(Co3dDataModuleConfig, cfg)

	def setup(self, stage=None):
	if stage in [None, "fit"]:
	self.train_dataset = Co3dIterableDataset(self.cfg, self.cfg.train_split)
	if stage in [None, "fit", "validate"]:
	self.val_dataset = Co3dDataset(self.cfg, self.cfg.val_split)
	if stage in [None, "test", "predict"]:
	self.test_dataset = Co3dDataset(self.cfg, self.cfg.test_split)

	def prepare_data(self):
	pass

	def general_loader(self, dataset, batch_size, collate_fn=None) -> DataLoader:
	sampler = None
	return DataLoader(
	dataset,
	num_workers=0,
	batch_size=batch_size,
	# pin_memory=True,
	collate_fn=collate_fn,
	)

	def train_dataloader(self):
	return self.general_loader(
	self.train_dataset, batch_size=1, collate_fn=self.train_dataset.collate
	)

	def val_dataloader(self):
	return self.general_loader(self.val_dataset, batch_size=1)

	def test_dataloader(self):
	return self.general_loader(self.test_dataset, batch_size=1)

	def predict_dataloader(self):
	return self.general_loader(self.test_dataset, batch_size=1)