UniDepth
UniDepth copied to clipboard
Very bad quality compared to Metric3D
I am running this on nuscenes with the following code adapted from your demo.py file:
import numpy as np
import torch
from PIL import Image
from unidepth.models import UniDepthV1, UniDepthV2
from unidepth.utils import colorize, image_grid
from nuscenes.nuscenes import NuScenes
import os
def demo(model):
rgb = np.array(Image.open(image_path))
rgb_torch = torch.from_numpy(rgb).permute(2, 0, 1)
# predict
predictions = model.infer(rgb_torch, intrin)
# get GT and pred
depth_pred = predictions["depth"].squeeze().cpu().numpy().astype(np.uint8)
depth_pred_PIL = Image.fromarray(depth_pred)
depth_pred_PIL.save('test.png')
depth_pred_col = colorize(depth_pred, vmin=0.01, vmax=100.0, cmap="magma_r")
im = Image.fromarray(depth_pred_col)
im.show()
if __name__ == "__main__":
dataroot = '../nuScenes/'
nusc = NuScenes(version='v1.0-mini', dataroot=dataroot, verbose=True)
samples = nusc.sample
cams = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT']
rec = samples[0]
cam = 'CAM_BACK'
samp = nusc.get('sample_data', rec['data'][cam])
imgname = os.path.join(nusc.dataroot, samp['filename'])
sens = nusc.get('calibrated_sensor', samp['calibrated_sensor_token'])
intrin = torch.Tensor(sens['camera_intrinsic'])
image_path = imgname
name = "unidepth-v2-vitl14"
# model = UniDepthV1.from_pretrained("lpiccinelli/unidepth-v1-vitl14")
model = UniDepthV2.from_pretrained(f"lpiccinelli/{name}")
# set resolution level (only V2)
# model.resolution_level = 0
# set interpolation mode (only V2)
# model.interpolation_mode = "bilinear"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
demo(model)
which just runs the model on one back camera image. Here is the results:
METRIC3D GIANT MODEL:
UNIDEPTH LARGE MODEL:
From your paper, it claims to outperform Metric3D but here it is performing quite bad, the result is extremely blurry and quite innacurate So I assume I am doing something wrong and could you point me in the right direction? Thanks