mvsnerf
mvsnerf copied to clipboard
DTU Training for NeRF
Thanks for sharing a great work. Currently, I'm trying to reproduce your results about your DTU training with the original NeRF (the result NeRF 10.2h in Tab 1), but I cannot produce your results. Can you share this NeRF training code.
I used the official repo and here is my data loader:
import os
import numpy as np
from PIL import Image
def load_dtu_data(basedir, downsample=1.0):
def read_cam_file(filename, scale_factor, downsample):
with open(filename) as f:
lines = [line.rstrip() for line in f.readlines()]
# extrinsics: line [1,5), 4x4 matrix
extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
extrinsics = extrinsics.reshape((4, 4))
# intrinsics: line [7-10), 3x3 matrix
intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
intrinsics = intrinsics.reshape((3, 3))
# depth_min & depth_interval: line 11
depth_min = float(lines[11].split()[0]) * scale_factor
depth_max = depth_min + float(lines[11].split()[1]) * 192 * scale_factor
# scaling
extrinsics[:3, 3] *= scale_factor
intrinsics[0:2] *= downsample
return intrinsics, extrinsics, [depth_min, depth_max]
root_dir = os.path.dirname(basedir)
scan = os.path.basename(basedir)
assert int(640*downsample)%32 == 0, \
f'image width is {int(640*downsample)}, it should be divisible by 32, you may need to modify the imgScale'
img_wh = (int(640*downsample),int(512*downsample))
downsample = downsample
print(f'==> image down scale: {downsample}')
scale_factor = 1.0 / 200
opencv2blender = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
# sub select training views from pairing file
pairs = np.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs-np.npy', allow_pickle=True)[()]
pair_train = np.arange(49).tolist()#pairs[f'dtu_train']
pair_test = pairs[f'dtu_test']
img_idx = pair_train + pair_test
near_fars = []
poses = []
all_intrinsic = []
all_rgbs = []
for idx in img_idx:
proj_mat_filename = os.path.join(root_dir, f'Cameras/train/{idx:08d}_cam.txt')
intrinsic, w2c, near_far = read_cam_file(proj_mat_filename, scale_factor, downsample)
c2w = np.linalg.inv(w2c) @ opencv2blender
poses += [c2w]
image_path = os.path.join(root_dir, f'Rectified/{scan}_train/rect_{idx + 1:03d}_3_r5000.png')
img = Image.open(image_path)
img = img.resize(img_wh, Image.LANCZOS)
img = (np.array(img) / 255.).astype(np.float32) # (3, h, w)
# img = img.transpose(1, 2, 0) # (h*w, 3) RGBA
all_rgbs += [img]
intrinsic[:2] *= 4
all_intrinsic.append(intrinsic)
near_fars.append(near_far)
near_fars = np.stack(near_fars).astype('float32')
poses = np.stack(poses).astype('float32')
all_intrinsic = np.stack(all_intrinsic).astype('float32')
all_rgbs = np.stack(all_rgbs, 0).astype('float32')
i_split = [np.arange(len(pair_train)), np.arange(len(pair_train),len(pair_train)+len(pair_test))]
return all_rgbs, poses, near_fars, all_intrinsic, i_split
I used the official repo and here is my data loader:
import os import numpy as np from PIL import Image def load_dtu_data(basedir, downsample=1.0): def read_cam_file(filename, scale_factor, downsample): with open(filename) as f: lines = [line.rstrip() for line in f.readlines()] # extrinsics: line [1,5), 4x4 matrix extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ') extrinsics = extrinsics.reshape((4, 4)) # intrinsics: line [7-10), 3x3 matrix intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ') intrinsics = intrinsics.reshape((3, 3)) # depth_min & depth_interval: line 11 depth_min = float(lines[11].split()[0]) * scale_factor depth_max = depth_min + float(lines[11].split()[1]) * 192 * scale_factor # scaling extrinsics[:3, 3] *= scale_factor intrinsics[0:2] *= downsample return intrinsics, extrinsics, [depth_min, depth_max] root_dir = os.path.dirname(basedir) scan = os.path.basename(basedir) assert int(640*downsample)%32 == 0, \ f'image width is {int(640*downsample)}, it should be divisible by 32, you may need to modify the imgScale' img_wh = (int(640*downsample),int(512*downsample)) downsample = downsample print(f'==> image down scale: {downsample}') scale_factor = 1.0 / 200 opencv2blender = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) # sub select training views from pairing file pairs = np.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs-np.npy', allow_pickle=True)[()] pair_train = np.arange(49).tolist()#pairs[f'dtu_train'] pair_test = pairs[f'dtu_test'] img_idx = pair_train + pair_test near_fars = [] poses = [] all_intrinsic = [] all_rgbs = [] for idx in img_idx: proj_mat_filename = os.path.join(root_dir, f'Cameras/train/{idx:08d}_cam.txt') intrinsic, w2c, near_far = read_cam_file(proj_mat_filename, scale_factor, downsample) c2w = np.linalg.inv(w2c) @ opencv2blender poses += [c2w] image_path = os.path.join(root_dir, f'Rectified/{scan}_train/rect_{idx + 1:03d}_3_r5000.png') img = Image.open(image_path) img = img.resize(img_wh, Image.LANCZOS) img = (np.array(img) / 255.).astype(np.float32) # (3, h, w) # img = img.transpose(1, 2, 0) # (h*w, 3) RGBA all_rgbs += [img] intrinsic[:2] *= 4 all_intrinsic.append(intrinsic) near_fars.append(near_far) near_fars = np.stack(near_fars).astype('float32') poses = np.stack(poses).astype('float32') all_intrinsic = np.stack(all_intrinsic).astype('float32') all_rgbs = np.stack(all_rgbs, 0).astype('float32') i_split = [np.arange(len(pair_train)), np.arange(len(pair_train),len(pair_train)+len(pair_test))] return all_rgbs, poses, near_fars, all_intrinsic, i_split
hello, i want to ask one question, why the intrinsic need * 4.