mvsnerf icon indicating copy to clipboard operation
mvsnerf copied to clipboard

DTU Training for NeRF

Open chaupham1709 opened this issue 2 years ago • 2 comments

Thanks for sharing a great work. Currently, I'm trying to reproduce your results about your DTU training with the original NeRF (the result NeRF 10.2h in Tab 1), but I cannot produce your results. Can you share this NeRF training code.

chaupham1709 avatar Mar 24 '22 06:03 chaupham1709

I used the official repo and here is my data loader:

import os
import numpy as np
from PIL import Image

def load_dtu_data(basedir, downsample=1.0):


    def read_cam_file(filename, scale_factor, downsample):
        with open(filename) as f:
            lines = [line.rstrip() for line in f.readlines()]
        # extrinsics: line [1,5), 4x4 matrix
        extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
        extrinsics = extrinsics.reshape((4, 4))
        # intrinsics: line [7-10), 3x3 matrix
        intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
        intrinsics = intrinsics.reshape((3, 3))
        # depth_min & depth_interval: line 11
        depth_min = float(lines[11].split()[0]) * scale_factor
        depth_max = depth_min + float(lines[11].split()[1]) * 192 * scale_factor

        # scaling
        extrinsics[:3, 3] *= scale_factor
        intrinsics[0:2] *= downsample

        return intrinsics, extrinsics, [depth_min, depth_max]

    root_dir = os.path.dirname(basedir)
    scan = os.path.basename(basedir)

    assert int(640*downsample)%32 == 0, \
        f'image width is {int(640*downsample)}, it should be divisible by 32, you may need to modify the imgScale'
    img_wh = (int(640*downsample),int(512*downsample))
    downsample = downsample
    print(f'==> image down scale: {downsample}')

    scale_factor = 1.0 / 200


    opencv2blender = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])

    # sub select training views from pairing file
    pairs = np.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs-np.npy', allow_pickle=True)[()]
    pair_train = np.arange(49).tolist()#pairs[f'dtu_train']
    pair_test = pairs[f'dtu_test']
    img_idx = pair_train + pair_test

    near_fars = []
    poses = []
    all_intrinsic = []
    all_rgbs = []
    for idx in img_idx:
        proj_mat_filename = os.path.join(root_dir, f'Cameras/train/{idx:08d}_cam.txt')
        intrinsic, w2c, near_far = read_cam_file(proj_mat_filename, scale_factor, downsample)
        c2w = np.linalg.inv(w2c) @ opencv2blender
        poses += [c2w]

        image_path = os.path.join(root_dir, f'Rectified/{scan}_train/rect_{idx + 1:03d}_3_r5000.png')


        img = Image.open(image_path)
        img = img.resize(img_wh, Image.LANCZOS)
        img = (np.array(img) / 255.).astype(np.float32)   # (3, h, w)
        # img = img.transpose(1, 2, 0)  # (h*w, 3) RGBA
        all_rgbs += [img]

        intrinsic[:2] *= 4
        all_intrinsic.append(intrinsic)
        near_fars.append(near_far)

    near_fars = np.stack(near_fars).astype('float32')
    poses = np.stack(poses).astype('float32')
    all_intrinsic = np.stack(all_intrinsic).astype('float32')
    all_rgbs = np.stack(all_rgbs, 0).astype('float32')
    i_split = [np.arange(len(pair_train)), np.arange(len(pair_train),len(pair_train)+len(pair_test))]

    return all_rgbs, poses, near_fars, all_intrinsic, i_split

apchenstu avatar Mar 26 '22 15:03 apchenstu

I used the official repo and here is my data loader:

import os
import numpy as np
from PIL import Image

def load_dtu_data(basedir, downsample=1.0):


    def read_cam_file(filename, scale_factor, downsample):
        with open(filename) as f:
            lines = [line.rstrip() for line in f.readlines()]
        # extrinsics: line [1,5), 4x4 matrix
        extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
        extrinsics = extrinsics.reshape((4, 4))
        # intrinsics: line [7-10), 3x3 matrix
        intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
        intrinsics = intrinsics.reshape((3, 3))
        # depth_min & depth_interval: line 11
        depth_min = float(lines[11].split()[0]) * scale_factor
        depth_max = depth_min + float(lines[11].split()[1]) * 192 * scale_factor

        # scaling
        extrinsics[:3, 3] *= scale_factor
        intrinsics[0:2] *= downsample

        return intrinsics, extrinsics, [depth_min, depth_max]

    root_dir = os.path.dirname(basedir)
    scan = os.path.basename(basedir)

    assert int(640*downsample)%32 == 0, \
        f'image width is {int(640*downsample)}, it should be divisible by 32, you may need to modify the imgScale'
    img_wh = (int(640*downsample),int(512*downsample))
    downsample = downsample
    print(f'==> image down scale: {downsample}')

    scale_factor = 1.0 / 200


    opencv2blender = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])

    # sub select training views from pairing file
    pairs = np.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs-np.npy', allow_pickle=True)[()]
    pair_train = np.arange(49).tolist()#pairs[f'dtu_train']
    pair_test = pairs[f'dtu_test']
    img_idx = pair_train + pair_test

    near_fars = []
    poses = []
    all_intrinsic = []
    all_rgbs = []
    for idx in img_idx:
        proj_mat_filename = os.path.join(root_dir, f'Cameras/train/{idx:08d}_cam.txt')
        intrinsic, w2c, near_far = read_cam_file(proj_mat_filename, scale_factor, downsample)
        c2w = np.linalg.inv(w2c) @ opencv2blender
        poses += [c2w]

        image_path = os.path.join(root_dir, f'Rectified/{scan}_train/rect_{idx + 1:03d}_3_r5000.png')


        img = Image.open(image_path)
        img = img.resize(img_wh, Image.LANCZOS)
        img = (np.array(img) / 255.).astype(np.float32)   # (3, h, w)
        # img = img.transpose(1, 2, 0)  # (h*w, 3) RGBA
        all_rgbs += [img]

        intrinsic[:2] *= 4
        all_intrinsic.append(intrinsic)
        near_fars.append(near_far)

    near_fars = np.stack(near_fars).astype('float32')
    poses = np.stack(poses).astype('float32')
    all_intrinsic = np.stack(all_intrinsic).astype('float32')
    all_rgbs = np.stack(all_rgbs, 0).astype('float32')
    i_split = [np.arange(len(pair_train)), np.arange(len(pair_train),len(pair_train)+len(pair_test))]

    return all_rgbs, poses, near_fars, all_intrinsic, i_split

hello, i want to ask one question, why the intrinsic need * 4.

smallpeachofchangan avatar Dec 13 '23 19:12 smallpeachofchangan