import os
import cv2
import torch
import numpy as np
from glob import glob
from torch.utils import data
from src.utils import load_rgb, load_mask, get_camera_params
from pytorch3d.renderer import PerspectiveCameras
from skimage import img_as_float32
# Below are for the differentiable renderer
# Taken from
def load_rgb(path):
img = imageio.imread(path)
img = img_as_float32(img)
# pixel values between [-1,1]
# img -= 0.5
# img *= 2.
# img = img.transpose(2, 0, 1)
return img
def load_mask(path):
alpha = imageio.imread(path, as_gray=True)
alpha = img_as_float32(alpha)
object_mask = alpha > 127.5
return object_mask
def get_camera_params(uv, pose, intrinsics):
if pose.shape[1] == 7: #In case of quaternion vector representation
cam_loc = pose[:, 4:]
R = quat_to_rot(pose[:,:4])
p = torch.eye(4).repeat(pose.shape[0],1,1).cuda().float()
p[:, :3, :3] = R
p[:, :3, 3] = cam_loc
else: # In case of pose matrix representation
cam_loc = pose[:, :3, 3]
p = pose
batch_size, num_samples, _ = uv.shape
depth = torch.ones((batch_size, num_samples))
x_cam = uv[:, :, 0].view(batch_size, -1)
y_cam = uv[:, :, 1].view(batch_size, -1)
z_cam = depth.view(batch_size, -1)
pixel_points_cam = lift(x_cam, y_cam, z_cam, intrinsics=intrinsics)
# permute for batch matrix product
pixel_points_cam = pixel_points_cam.permute(0, 2, 1)
world_coords = torch.bmm(p, pixel_points_cam).permute(0, 2, 1)[:, :, :3]
ray_dirs = world_coords - cam_loc[:, None, :]
ray_dirs = F.normalize(ray_dirs, dim=2)
return ray_dirs, cam_loc
def quat_to_rot(q):
batch_size, _ = q.shape
q = F.normalize(q, dim=1)
R = torch.ones((batch_size, 3,3)).cuda()
qi = q[:, 1]
qj = q[:, 2]
qk = q[:, 3]
R[:, 0, 0]=1-2 * (qj**2 + qk**2)
R[:, 0, 1] = 2 * (qj *qi -qk*qr)
R[:, 0, 2] = 2 * (qi * qk + qr * qj)
R[:, 1, 0] = 2 * (qj * qi + qk * qr)
R[:, 1, 1] = 1-2 * (qi**2 + qk**2)
R[:, 1, 2] = 2*(qj*qk - qi*qr)
R[:, 2, 0] = 2 * (qk * qi-qj * qr)
R[:, 2, 1] = 2 * (qj*qk + qi*qr)
R[:, 2, 2] = 1-2 * (qi**2 + qj**2)
return R
def lift(x, y, z, intrinsics):
# parse intrinsics
# intrinsics = intrinsics.cuda()
fx = intrinsics[:, 0, 0]
fy = intrinsics[:, 1, 1]
cx = intrinsics[:, 0, 2]
cy = intrinsics[:, 1, 2]
sk = intrinsics[:, 0, 1]
x_lift = (x - cx.unsqueeze(-1) + cy.unsqueeze(-1)*sk.unsqueeze(-1)/fy.unsqueeze(-1) - sk.unsqueeze(-1)*y/fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z
y_lift = (y - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z
# homogeneous
return torch.stack((x_lift, y_lift, z, torch.ones_like(z)), dim=-1)
class PixelNeRFDTUDataset(data.Dataset):
Processed DTU from pixelNeRF
def __init__(self,
data_dir = os.path.join(data_dir, "scan{}".format(scan_id))
rgb_paths = [
x for x in glob(os.path.join(data_dir, "image", "*"))
if (x.endswith(".jpg") or x.endswith(".png"))
rgb_paths = sorted(rgb_paths)
mask_paths = sorted(glob(os.path.join(data_dir, "mask", "*.png")))
if len(mask_paths) == 0:
mask_paths = [None] * len(rgb_paths)
sel_indices = np.arange(len(rgb_paths))
cam_path = os.path.join(data_dir, "cameras.npz")
all_cam = np.load(cam_path)
all_imgs = []
all_poses = []
all_masks = []
all_rays = []
all_light_pose = []
all_K = []
all_R = []
all_T = []
for idx, (rgb_path, mask_path) in enumerate(zip(rgb_paths, mask_paths)):
i = sel_indices[idx]
rgb = load_rgb(rgb_path)
mask = load_mask(mask_path)
rgb[~mask] = 0.
rgb = torch.from_numpy(rgb).float().to(device)
mask = torch.from_numpy(mask).float().to(device)
x_scale = y_scale = 1.0
xy_delta = 0.0
P = all_cam["world_mat_" + str(i)]
P = P[:3]
# scale the original shape to really [-0.9, 0.9]
if fixed_scale!=0.:
scale_mat_new = np.eye(4, 4)
scale_mat_new[:3, :3] *= fixed_scale # scale to [-0.9, 0.9]
P = all_cam["world_mat_" + str(i)] @ all_cam["scale_mat_" + str(i)] @ scale_mat_new
P = all_cam["world_mat_" + str(i)] @ all_cam["scale_mat_" + str(i)]
P = P[:3, :4]
K, R, t = cv2.decomposeProjectionMatrix(P)[:3]
K = K / K[2, 2]
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
RR = torch.from_numpy(R).permute(1, 0).unsqueeze(0)
tt = torch.from_numpy(-R@(t[:3] / t[3])).permute(1, 0)
focal = torch.tensor((fx, fy), dtype=torch.float32).unsqueeze(0)
pc = torch.tensor((cx, cy), dtype=torch.float32).unsqueeze(0)
im_size = (rgb.shape[1], rgb.shape[0])
# check for how to transform from screen to NDC
s = min(im_size)
focal[:, 0] = focal[:, 0] * 2 / (s-1)
focal[:, 1] = focal[:, 1] * 2 /(s-1)
pc[:, 0] = -(pc[:, 0] - (im_size[0]-1)/2) * 2 / (s-1)
pc[:, 1] = -(pc[:, 1] - (im_size[1]-1)/2) * 2 / (s-1)
camera = PerspectiveCameras(focal_length=-focal, principal_point=pc,
device=device, R=RR, T=tt)
# calculate camera rays
uv = uv_creation(im_size)[None].float()
pose = np.eye(4, dtype=np.float32)
pose[:3, :3] = R.transpose()
pose[:3,3] = (t[:3] / t[3])[:,0]
pose = torch.from_numpy(pose)[None].float()
intrinsics = np.eye(4)
intrinsics[:3, :3] = K
intrinsics[0, 1] = 0. #! remove skew for now
intrinsics = torch.from_numpy(intrinsics)[None].float()
rays, _ = get_camera_params(uv, pose, intrinsics)
rays =
# only for neural renderer
all_imgs = torch.stack(all_imgs)
all_masks = torch.stack(all_masks)
all_rays = torch.stack(all_rays)
all_light_pose = torch.stack(all_light_pose).squeeze()
# only for neural renderer
all_K = torch.stack(all_K).float()
all_R = torch.stack(all_R).float()
all_T = torch.stack(all_T).permute(0, 2, 1).float()
uv = uv_creation((all_imgs.size(2), all_imgs.size(1))) = {'rgbs': all_imgs,
'masks': all_masks,
'poses': all_poses,
'rays': all_rays,
'uv': uv,
'light_pose': all_light_pose, # for rendering lights
'K': all_K,
'R': all_R,
'T': all_T,
def __len__(self):
return 1
def __getitem__(self, idx):