OpenSfM
OpenSfM copied to clipboard
Deriving an XYZ undistorted array from a depth map
Question: Is there a way to get the XYZ values as additional bands when computing the depth map?
I understand that a depth map generally represents the Y direction for each pixel in an image, which after searching through the code seems to be the only array/dimension returned when computing the various depth maps (apart from the pruned depth map which I asked about below) raw and clean from the C library. However, I would like to know what would be the best way to get the X and Z dimensions in addition to the Y dimension.
Is there any documentation that explains what's being done to produce the raw, clean, and pruned depth maps?
Option 1: Using Similar triangles
The method I'm currently using makes use of similar triangles outlined in the image below to calculate the X and Z dimensions using the Y and focal ratio.
The code Im using:
import os
import numpy as np
import matplotlib.pyplot as plt
from opensfm.dataset import UndistortedDataSet, DataSet
def generate_image_coordinates(image_shape):
"""
Generate pixel coordinates for every pixel of an image of a given shape
:param tuple[int] image_shape: The size of each dimension of the array in numpy order.
E.g (n, m) = n rows and m columns for a 2D image
In general, the tuple has the form (n1, n2, ..., nk, d), with k = 2, d = 3 for an RGB image
:return: An (n1 * n2 * ... * nk, d) array
"""
coords_per_axis = np.meshgrid(*[np.arange(upper) for upper in image_shape], indexing="ij")
coords_per_axis = [coords.flatten() for coords in coords_per_axis]
image_coordinates = np.column_stack((coords_per_axis))
return image_coordinates
def convert_depth_map_to_xyz_array(undistorted_dataset, image_name, depth_map_type="clean", add_normals=False, add_scores=False):
# Load the reconstruction json
reconstructions = undistorted_dataset.load_undistorted_reconstruction()
focal_ratio = None
for reconstruction in reconstructions:
flag = False
# Find the reconstruction that contains the image we are interested in
for _image_name, shot in reconstruction.shots.items():
if _image_name == image_name:
# Load the camera information and focal_ratio
camera = reconstruction.cameras[shot.camera.id]
focal_ratio = camera.focal
break
if flag:
break
if focal_ratio is None:
raise KeyError(f"The camera information could not be found for image {image_name}")
# Load the depth map
if depth_map_type == "raw":
depth, plane, score, nghbr, nghbrs = undistorted_dataset.load_raw_depthmap(image_name)
elif depth_map_type == "clean":
depth, plane, score = undistorted_dataset.load_clean_depthmap(image_name)
elif depth_map_type == "pruned":
# TODO: Investigate the outputs of the pruned depth map function
raise NotImplementedError("Currently unable to implement method for the pruned points")
else:
raise NotImplementedError(f"Currently no depth map method named {depth_map_type}")
# Generate image coordniates starting in the upper left corner
image_coords = generate_image_coordinates(depth.shape)
# Invert the Y coordinates
image_coords[:,0] = (depth.shape[0] - 1) - image_coords[:,0]
# Normalise the image coordinates https://opensfm.org/docs/geometry.html
normalised_image_coords = image_coords / depth.shape - 0.5
# Calculate Z and X given the depth (Y) and focal ratio
zx_layer = depth.flatten().reshape(-1, 1) * normalised_image_coords / focal_ratio
if add_normals and add_scores:
# Return a 5 band array X, Y, Z, Normals, Score
return np.dstack((zx_layer[:,1].reshape(depth.shape), depth, zx_layer[:,0].reshape(depth.shape), plane, score))
elif add_normals:
# Return a 4 band array X, Y, Z, Normals
return np.dstack((zx_layer[:,1].reshape(depth.shape), depth, zx_layer[:,0].reshape(depth.shape), plane))
elif add_scores:
# Return a 4 band array X, Y, Z, Score
return np.dstack((zx_layer[:,1].reshape(depth.shape), depth, zx_layer[:,0].reshape(depth.shape), score))
else:
# Return a 3 band array X, Y and Z
return np.dstack((zx_layer[:,1].reshape(depth.shape), depth, zx_layer[:,0].reshape(depth.shape)))
dataset = DataSet(capture_session_path)
udata_path = os.path.join(dataset.data_path, "undistorted")
udata = UndistortedDataSet(dataset, udata_path, io_handler=dataset.io_handler)
reconstructions = udata.load_undistorted_reconstruction()
images = []
for reconstruction in reconstructions:
images.extend(reconstruction.shots.keys())
for image in images:
if not udata.raw_depthmap_exists(image):
continue
undistorted_image = udata.load_undistorted_image(image)
depth_map = convert_depth_map_to_xyz_array(udata, image)
fig, axs = plt.subplots(1, 4, figsize=(20,10))
axs[0].imshow(undistorted_image)
axs[0].set_title("RGB image")
axs[1].imshow(depth_map[:,:,0])
axs[1].set_title("X dimensions")
axs[2].imshow(depth_map[:,:,1])
axs[2].set_title("Y dimensions")
axs[3].imshow(depth_map[:,:,2])
axs[3].set_title("Z dimensions")
plt.show()
Is this an acceptable method or is there already code in the library that can produce this result?
Option 2: Pruned depth map
It seems that instead of getting depth, plane, score, nghbr, nghbrs from the raw depth map and clean_depth, clean_plane, clean_score from the clean depth map we get points, normals, colors, labels from the pruned depth map. However, it seems that these points can't be reshaped into the depth map image dimensions? Am I misunderstanding the use behind the pruned depth map, does each point not directly relate to a pixel in the depth map image space?