Depth-Anything-ONNX
Depth-Anything-ONNX copied to clipboard
Do you have example depth_to_pointcloud.py with ONNX
Do you have example depth_to_pointcloud.py(depth-anything v2) with ONNX models.
Thank you.
Hi @Siwakonrome, thank you for your interest in Depth-Anything-ONNX.
If I understand the script correctly, inference is the same up to this line:
https://github.com/DepthAnything/Depth-Anything-V2/blob/31dc97708961675ce6b3a8d8ffa729170a4aa273/metric_depth/depth_to_pointcloud.py#L93
pred = depth_anything.infer_image(image, height)
In the ONNX inference, the above line corresponds to:
https://github.com/fabio-sim/Depth-Anything-ONNX/blob/3128cb99056785cc843ad6deda2f53f2e2ff4272/dynamo.py#L238
This means that you only need to adapt the following lines:
https://github.com/DepthAnything/Depth-Anything-V2/blob/31dc97708961675ce6b3a8d8ffa729170a4aa273/metric_depth/depth_to_pointcloud.py#L96-L110
# Resize depth prediction to match the original image size
resized_pred = Image.fromarray(pred).resize((width, height), Image.NEAREST)
# Generate mesh grid and calculate point cloud coordinates
x, y = np.meshgrid(np.arange(width), np.arange(height))
x = (x - width / 2) / args.focal_length_x
y = (y - height / 2) / args.focal_length_y
z = np.array(resized_pred)
points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
colors = np.array(color_image).reshape(-1, 3) / 255.0
# Create the point cloud and save it to the output directory
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)
@fabio-sim Thank You for the Information.
I try to export onnx from metric_depth like depth_anything_v2_vits_dynamic.onnx
depth_anything_v2_vits.pth. Metric.
Export code:
import torch
from metric_depth.depth_anything_v2.dpt import DepthAnythingV2
model_configs = {
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}
opset = 18
batch_size = 1
encoder_name = 'vits'
output_onnx = '/content/test0.onnx'
model_path = '/content/depth_anything_v2_vits.pth'
def export():
"""Export Depth-Anything V2 using TorchDynamo."""
model = DepthAnythingV2(**{**model_configs[encoder_name], 'max_depth': 20})
model.load_state_dict(torch.load(model_path, map_location='cpu'))
if True:
if opset == 18:
onnx_program = torch.onnx.dynamo_export(
model, torch.randn(batch_size, 3, 518, 518)
)
onnx_program.save(str(output_onnx))
export()
It work when i resized image input ( image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC) ). But when i remove this line ( image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC) ) it return:
RuntimeError: Error in execution: Got invalid dimensions for input: l_x_ for the following indices
index: 2 Got: 1544 Expected: 518
index: 3 Got: 2048 Expected: 518
Please fix either the inputs/outputs or the model.
Infer code:
import cv2
from PIL import Image
import onnxruntime as ort
import numpy as np
import open3d as o3d
import time
'''
Basler 2K
'''
ppx = 8.709002501455901 * 10**2 # Principle point x in pixel.
ppy = 5.878986862144450 * 10**2 # Principle point y in pixel.
focal_length_x_pixel = 1.017288703871180 * 10**4 # Focal length in pixel.
focal_length_y_pixel = 1.018932963024356 * 10**4 # Focal length in pixel.
def infer():
"""Depth-Anything V2 inference using ONNXRuntime. No dependency on PyTorch."""
image_path = r'C:\FiboWork\bgc_glass_gob_inspection\depth_anything_v2\images\29.png'
model_path = r'C:\FiboWork\bgc_glass_gob_inspection\depth_anything_v2\models\test.onnx'
width, height = 518, 518
image = cv2.imread(str(image_path))
color_image = Image.open(image_path).convert('RGB')
h, w = image.shape[:2]
print(h, w)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC) # Need to using dynamic image shape
image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
image = image.transpose(2, 0, 1)[None].astype("float32")
# Inference
sess_options = ort.SessionOptions()
sess_options.enable_profiling = False
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
session = ort.InferenceSession(
model_path, sess_options=sess_options, providers=providers
)
binding = session.io_binding()
ort_input = session.get_inputs()[0].name
binding.bind_cpu_input(ort_input, image)
ort_output = session.get_outputs()[0].name
binding.bind_output(ort_output, "cuda")
# Actual inference happens here.
session.run_with_iobinding(binding)
t0 = time.time()
pred = binding.get_outputs()[0].numpy()[0]
resized_pred = Image.fromarray(pred).resize((w, h), Image.NEAREST)
print(f'Pre Processing: {time.time() - t0}')
'''
Post Processing
Generate mesh grid and calculate point cloud coordinates
'''
t0 = time.time()
x, y = np.meshgrid(np.arange(w), np.arange(h))
x = (x - width / 2) / focal_length_x_pixel
y = (y - height / 2) / focal_length_y_pixel
z = np.array(resized_pred) # * 59.717
points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
print(f'Post Processing: {time.time() - t0}')
'''
Create the point cloud and save it to the output directory
'''
colors = np.array(color_image).reshape(-1, 3) / 255.0
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
bounding_box = o3d.geometry.AxisAlignedBoundingBox(min_bound=np.array([-200.0, -200.0, 1160.0]),
max_bound=np.array([200.0, 200.0, 1500.0])
)
cropped_pcd = pcd.crop(bounding_box)
obb = cropped_pcd.get_oriented_bounding_box()
obb.color = (1, 0, 0)
dimension = obb.extent
dimension_text = f"Dimensions (L x R) in mm: {dimension[0]:.2f} x {dimension[1]:.2f}"
print(dimension_text)
o3d.visualization.draw_geometries([cropped_pcd, obb])
infer()
How to export onnx that i can using dynamic shape of input image. What did i do wrong? Thank you.
For dynamic shapes, you can try this command:
python dynamo.py export -b 0 -h 0 -w 0 --opset 17