UniDepth icon indicating copy to clipboard operation
UniDepth copied to clipboard

Very bad quality compared to Metric3D

Open seamie6 opened this issue 5 months ago • 2 comments

I am running this on nuscenes with the following code adapted from your demo.py file:

import numpy as np
import torch
from PIL import Image

from unidepth.models import UniDepthV1, UniDepthV2
from unidepth.utils import colorize, image_grid

from nuscenes.nuscenes import NuScenes
import os


def demo(model):
    rgb = np.array(Image.open(image_path))
    rgb_torch = torch.from_numpy(rgb).permute(2, 0, 1)

    # predict
    predictions = model.infer(rgb_torch, intrin)

    # get GT and pred
    depth_pred = predictions["depth"].squeeze().cpu().numpy().astype(np.uint8)
    
    depth_pred_PIL = Image.fromarray(depth_pred)
    depth_pred_PIL.save('test.png')

    depth_pred_col = colorize(depth_pred, vmin=0.01, vmax=100.0, cmap="magma_r")
    im = Image.fromarray(depth_pred_col)
    
    im.show()



if __name__ == "__main__":
    dataroot = '../nuScenes/'
    nusc = NuScenes(version='v1.0-mini', dataroot=dataroot, verbose=True)

    samples = nusc.sample
    cams = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 
            'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT']

    rec = samples[0]
    cam  = 'CAM_BACK'
    samp = nusc.get('sample_data', rec['data'][cam])
    imgname = os.path.join(nusc.dataroot, samp['filename'])

    sens = nusc.get('calibrated_sensor', samp['calibrated_sensor_token'])
    intrin = torch.Tensor(sens['camera_intrinsic'])

    image_path = imgname
    
    name = "unidepth-v2-vitl14"
    # model = UniDepthV1.from_pretrained("lpiccinelli/unidepth-v1-vitl14")
    model = UniDepthV2.from_pretrained(f"lpiccinelli/{name}")

    # set resolution level (only V2)
    # model.resolution_level = 0

    # set interpolation mode (only V2)
    # model.interpolation_mode = "bilinear"

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    demo(model)

which just runs the model on one back camera image. Here is the results:

METRIC3D GIANT MODEL: n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525

UNIDEPTH LARGE MODEL: test

From your paper, it claims to outperform Metric3D but here it is performing quite bad, the result is extremely blurry and quite innacurate So I assume I am doing something wrong and could you point me in the right direction? Thanks

seamie6 avatar Sep 03 '24 10:09 seamie6