mast3r copied to clipboard
Issue with reprojection when compared with DUST3R
Hi there,
For a downstream task I am needing to reproject an image taken from multiple camera's into another camera, and your work is perfect for that! I have had success with doing this with DUST3R, however when using MAST3R there seems to be pixel shifting problems.
Using this as a sanity check:
cam = 1
world2cam = np.linalg.inv(cams2world[cam])
pts = point_cloud_list[cam] # HW x 3
# Transform point cloud to camera coordinate system
pts_homo = np.hstack([pts, np.ones((pts.shape[0], 1))])
pts_cam = world2cam @ pts_homo.T # 4xN
x = pts_cam[0,:]*focals[cam] / pts_cam[2,:]
y = pts_cam[1,:]*focals[cam] / pts_cam[2,:]
fig = plt.figure()
ax = fig.add_subplot()
The plots for DUST3R look like this, with 0, 0 being central:
However for MAST3R we have this, whereby there is a large amount of shift in both axis, implying a possible error in camera poses
I was wondering if it known what this reason could be and if there's a fix possible, as would love to migrate from DUST3R over to MAST3R.
MAST3R Version: Commit hash b1b5578
Files used:
Many thanks, Finlay
Below is code used to generate the files as well to load them
prefix = "DUST3R" if USE_DUST3R else "MAST3R"
focals = np.load(f"data/{prefix}/focals.npy")
cams2world = np.load(f"data/{prefix}/cams2world.npy")
point_cloud_list = np.load(f"data/{prefix}/points_cloud_list.npy")
rgbimg = np.load(f"data/{prefix}/rgbimg.npy")
focals = focals[:, 0]
point_cloud_list = [p.reshape(-1, 3) for p in point_cloud_list]
weights_path = "dust3r/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth"
model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(device)
schedule = 'cosine'
lr = 0.01
niter = 300
min_conf_thr = 8.0 # Defaults at 3.0
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
output = inference(pairs, model, device, batch_size=1)
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer,
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)
imgs = np.stack(scene.imgs)
focals = scene.get_focals().detach().cpu().numpy()
poses = scene.get_im_poses().detach().cpu().numpy()
pts3d = [pt.detach().cpu().numpy() for pt in scene.get_pts3d()]
conf_masks = [conf.detach().cpu().numpy() for conf in scene.get_masks()]
optim_level = "refine"
lr1, lr2 = 0.07, 0.014
niter1, niter2 = 500, 200
min_conf_thr = 1.5
matching_conf_thr = 5.0
clean_depth = True
scenegraph_type = 'complete'
winsize = 1
win_cyclic = False
refid = 0
TSDF_thresh = 0.0
shared_intrinsics = False
if optim_level == 'coarse':
niter2 = 0
weights_path = "MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth"
model = AsymmetricMASt3R.from_pretrained(weights_path).to(device)
scene_graph_params = [scenegraph_type]
if scenegraph_type in ["swin", "logwin"]:
elif scenegraph_type == "oneref":
if scenegraph_type in ["swin", "logwin"] and not win_cyclic:
scene_graph = '-'.join(scene_graph_params)
pairs = make_pairs(images, scene_graph=scene_graph, prefilter=None, symmetrize=True)
scene = sparse_global_alignment(filelist, pairs, "/tmp/mast3r",
model, lr1=lr1, niter1=niter1, lr2=lr2, niter2=niter2, device=device,
opt_depth='depth' in optim_level, shared_intrinsics=shared_intrinsics,
# 3D pointcloud from depthmap, poses and intrinsics
if TSDF_thresh > 0:
tsdf = TSDFPostProcess(scene, TSDF_thresh=TSDF_thresh)
pts3d, _, confs = to_numpy(tsdf.get_dense_pts3d(clean_depth=clean_depth))
pts3d, _, confs = to_numpy(scene.get_dense_pts3d(clean_depth=clean_depth))
conf_masks = to_numpy([c > min_conf_thr for c in confs])
focals = scene.get_focals().cpu().numpy()
poses = scene.get_im_poses().cpu().numpy()
imgs = scene.imgs