DROID-SLAM
DROID-SLAM copied to clipboard
Visualizing saved reconstruction.
Hi. Thank you for the amazing work.
I am running the code on remote server in --disable_vis mode. I have the saved reconstruction results in .npy format. I am not sure how to reconstruct the scene in Open3d using these .npy files.
Before that I was using visualization mode and directly saving .ply files and I was able to visualize them in open3d. But I don't want to do that anymore and want to use the default .npy files.
Thank you~
Can we please get some guide on this?
I do not know how famailer you are with Open3D, but you can use RGBD interation to construct .ply files from data produced in .npy files.
You can check this for more info.
@bkhanal-11 Hi. I wrote the following code.
import glob
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
class CameraPose:
def __init__(self, meta, mat):
self.metadata = meta
self.pose = mat
def __str__(self):
return (
"Metadata : "
+ " ".join(map(str, self.metadata))
+ "\n"
+ "Pose : "
+ "\n"
+ np.array_str(self.pose)
)
def read_trajectory(filename):
traj = []
with open(filename, "r", encoding="utf-8") as f:
metastr = f.readline()
while metastr:
metadata = list(map(int, metastr.split()))
mat = np.zeros(shape=(4, 4))
for i in range(4):
matstr = f.readline()
mat[i, :] = np.fromstring(matstr, dtype=float, sep=" \t")
traj.append(CameraPose(metadata, mat))
metastr = f.readline()
return traj
volume = o3d.pipelines.integration.ScalableTSDFVolume(
voxel_length=4.0 / 512.0,
sdf_trunc=0.04,
color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8,
)
intrinsics = np.load("./results/intrinsics.npy")
fx = intrinsics[0][0]
fy = intrinsics[0][1]
cx = intrinsics[0][2]
cy = intrinsics[0][3]
color_images = np.load("./results/images.npy")
depth_images = np.load("./results/disps.npy")
camera_poses = read_trajectory("./results/log.txt")
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
width=576, height=336, fx=fx, fy=fy, cx=cx, cy=cy
)
for i in range(len(color_images)):
print(f"Integrate {i}-th image into the volume.")
color = color_images[i]
color = np.ascontiguousarray(color.transpose(1, 2, 0))
color = o3d.geometry.Image((color).astype(np.uint8))
depth = depth_images[i]
depth = np.ascontiguousarray(depth.transpose(0, 1))
depth = o3d.geometry.Image((depth * 255))
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
color, depth, depth_trunc=4.0, convert_rgb_to_intensity=False
)
volume.integrate(rgbd, camera_intrinsic, np.linalg.inv(camera_poses[i].pose))
print("Extract a triangle mesh from the volume and visualize it.")
mesh = volume.extract_triangle_mesh()
mesh.compute_vertex_normals()
o3d.visualization.draw_geometries([mesh])
However, the resulting map is not correct. I am missing something but not sure what it is. Here is the result.

@bkhanal-11 This is how my log.txt looks like.
0 0 1
-1.0 0.0 0.0 0.0
0.0 -1.0 0.0 0.0
0.0 0.0 1.0 0.0
0.0 0.0 0.0 1.0
1 1 2
-0.9988476022589705 -0.02794569745350285 0.039019295936009724 -0.017698440700769424
0.02813569752382185 -0.9995947404348644 0.004328674139191881 -0.010660745203495026
0.03888251517523815 0.005421520892940287 0.9992290814046871 -0.010660745203495026
0.0 0.0 0.0 1.0
2 2 3
-0.9921915766540266 -0.07212852491360724 0.10175141822394249 -0.15529966354370117
0.0741547246722205 -0.9971140443786379 0.016268353071603008 -0.02430793270468712
0.10028435583670908 0.02368667128708258 0.9946768267018962 -0.02430793270468712
0.0 0.0 0.0 1.0
3 3 4
-0.9858344461233062 -0.09958461872798406 0.1349568395804975 -0.229265496134758
0.10741816380243378 -0.9928507594990048 0.05204524423578825 -0.11546353995800018
0.1288090948732565 0.06580481042485715 0.9894836754605307 -0.11546353995800018
0.0 0.0 0.0 1.0
4 4 5
-0.9800208043395229 -0.11316477936030064 0.16356330815329656 -0.30861613154411316
0.12554535795960764 -0.9897944798567042 0.06741847476761405 -0.17945930361747742
0.1542646626953489 0.08660612194026414 0.9842264950131948 -0.17945930361747742
0.0 0.0 0.0 1.0
@pytholic Hi, I also tried to write script for visualization and end up with similar result as yours. It may be due to either intrinsic or disparity (depth). I am also stuck on same problem now.
@bkhanal-11 @pytholic Hi. Sorry just want to ask if you ever figured out why this was happening? Also running this on a remote server and have to use --disable_vis but im also getting a mess after trying reconstruction?
@kelvinyankey6 My workaround was to save points and poses in .ply file on the fly.
https://github.com/pytholic/ml_cv_scripts_guides/blob/main/droid_slam/vis_headless.py
Ah @pytholic I did the same to save the points. My environment doesn't have access to an x server. But I didn't know I could do the same for the camera poses. This is really helpful. Thank you very much for getting back to me
@kelvinyankey6 My workaround was to save points and poses in
.plyfile on the fly. https://github.com/pytholic/ml_cv_scripts_guides/blob/main/droid_slam/vis_headless.py
Hello, thanks for your code! I can't save the '.ply' in that way. Because I running this on a remote server, this means it won't go into 'droid_visualization' function.
I had a similar problem when I used open3d to visualize the saved reconstruction in sfm_bench dataset.
My approach is to use pose, intrinsics and depth to directly recover the 3D coordinates of the point.
def visual(disp, K, T, image):
depth = 1.0 / disp.squeeze() # h, w
height, width = depth.shape
img = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), dsize=(width, height), interpolation=cv2.INTER_LINEAR)
fx, fy, cx, cy = K
intrinsics_matrix = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]])
intrinsics_matrix_homo = np.zeros((3, 4), dtype=np.float32)
intrinsics_matrix_homo[:3, :3] = intrinsics_matrix
intrinsics_matrix_homo[2,3] = 1
pose = pose_quaternion_to_matrix(T)
points = []
rgbs = []
for v in range(height):
for u in range(width):
d = depth[v, u]
if d==0: continue
x_cam = (u - cx) / fx * d
y_cam = (v - cy) / fy * d
z_cam = d
points.append([x_cam, y_cam, z_cam])
rgbs.append(img[v, u, :])
rgbs = np.array(rgbs)
points = np.array(points)
ones = np.ones((points.shape[0], 1))
points = np.hstack((points, ones)) # homo
pc_world = (pose @ points.T).T
return points, pc_world, intrinsics_matrix_homo, pose, rgbs
points, pc_world, intrinsics_matrix_homo, pose, rgbs = visual(disp, K, T, image)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(pc_world[:, 3])
pcd.colors = o3d.utility.Vector3dVector(rgbs / 255.0)
o3d.visualization.draw_geometries([pcd])
@kelvinyankey6 My workaround was to save points and poses in
.plyfile on the fly. https://github.com/pytholic/ml_cv_scripts_guides/blob/main/droid_slam/vis_headless.pyHello, thanks for your code! I can't save the '.ply' in that way. Because I running this on a remote server, this means it won't go into 'droid_visualization' function.
I had a similar problem when I used open3d to visualize the saved reconstruction in sfm_bench dataset.
My approach is to use pose, intrinsics and depth to directly recover the 3D coordinates of the point.
def visual(disp, K, T, image): depth = 1.0 / disp.squeeze() # h, w height, width = depth.shape img = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), dsize=(width, height), interpolation=cv2.INTER_LINEAR) fx, fy, cx, cy = K intrinsics_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) intrinsics_matrix_homo = np.zeros((3, 4), dtype=np.float32) intrinsics_matrix_homo[:3, :3] = intrinsics_matrix intrinsics_matrix_homo[2,3] = 1 pose = pose_quaternion_to_matrix(T) points = [] rgbs = [] for v in range(height): for u in range(width): d = depth[v, u] if d==0: continue x_cam = (u - cx) / fx * d y_cam = (v - cy) / fy * d z_cam = d points.append([x_cam, y_cam, z_cam]) rgbs.append(img[v, u, :]) rgbs = np.array(rgbs) points = np.array(points) ones = np.ones((points.shape[0], 1)) points = np.hstack((points, ones)) # homo pc_world = (pose @ points.T).T return points, pc_world, intrinsics_matrix_homo, pose, rgbspoints, pc_world, intrinsics_matrix_homo, pose, rgbs = visual(disp, K, T, image) pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(pc_world[:, 3]) pcd.colors = o3d.utility.Vector3dVector(rgbs / 255.0) o3d.visualization.draw_geometries([pcd])
Use open3d headless rendering and it works.
@pytholic forgot to respond to this. But I used your method. It worked. Thanks
@kelvinyankey6 cool! @NOTGOOOOD please try it with headless rendering, will work hopefully!
@kelvinyankey6 cool! @NOTGOOOOD please try it with headless rendering, will work hopefully!
Hi thanks for sharing your vis_headless.py code, I can do visualization on my device so I would like to ask how to use your code? like shall I download ml_cv_scripts_guides/droid_slam/ folder and place the files under the original DROIDSLAM project under and then just run python demo.py --imagedir= data/exampledata --calib=calib/exampledata.txt --reconstruction_path=exampledata?
Or is there any other special step to use your vis_headless.py to get ply?
@kelvinyankey6 cool! @NOTGOOOOD please try it with headless rendering, will work hopefully!
Thank you so much for sharing your code, I was able to get the point cloud ply file I want.
For those may have same question with me: I used demo.py to replace the original Droidslam demo.py and vis_headless.py to replace the original code visuallization.py. and simply change line 38 in droid.py to set the save path:
self.visualizer = Process(target=droid_visualization, args=(self.video,"saveplypath"))
HI, thanks for your work and explanations. I'm running the code in a docker container and the headless script is not working there, I get the error: Traceback (most recent call last): File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/DROID-SLAM/vis_headless.py", line 175, in droid_visualization vis.get_render_option().load_from_json("misc/renderoption.json") AttributeError: 'NoneType' object has no attribute 'load_from_json'
So it seems it cannot create the open3d visualizer. Is th code supposed to able to run in docker or am I missing something?
Thanks
@Zunhammer Not sure. I think I have used it with docker in the past but that was my custom docker.
Here is my docker for Droid Slam: https://github.com/princeton-vl/DROID-SLAM/pull/98
I created PR but no one is maintaining. You can check this: http://www.open3d.org/docs/0.16.0/docker.html
@pytholic @Zunhammer , Hi, i am also using the docker provided by @pytholic and trying to generate ply files from the docker container. However, I am having the same problem as @Zunhammer. Is there anyway to solve this problem?
First, thanks to @pytholic for the response and the hints. I tried with my own docker but had no luck, didn't have time to test the one from pytholic yet. I'll probably do this after christmas. So in short, I don't have a solution yet.
@Zunhammer , I solved this issue already so may be it would be helpful to you too. @pytholic's Dockerfile works great. But to use it in a headless environment and still to be able to save ply files, you also need to use open3d headless. After building the Docker image with @pytholic's Dockerfile, remove the open3d from the container and recompile the open3d headless and install it as described - http://www.open3d.org/docs/latest/tutorial/Advanced/headless_rendering.html. And use https://github.com/princeton-vl/DROID-SLAM/issues/75#issuecomment-1788157019 this method and it worked out well.
As mentioned by @HtutLynn , if you are using my headless script, you need to install open3D-headless in your container. I used headless because I was working with a remote machine. Also, I did not add open3d-headless in the Dockerfile to keep it more generalized.
If you want to use it without a headless, then you might have to pick some parts from my script (parts that save .ply files) and add them to the original script. Also, in that case, you should use normal open3d, and you might also have to use X11 forwarding for the container.
@pytholic I am trying to use your implementation of visualization code, but the demo.py does not generate any log.txt camera trajectory for pose. Do you know how to get the log.txt ?
@jmunshi1990 Hi. It has been a long time since I used this. I don't remember how I was saving the final point cloud but maybe you can take a look at this part from the official demo for camera pose.
https://github.com/pytholic/DROID-SLAM/blob/f68a25fa230730e3eaaaa6ab03a818ca5da6954f/demo.py#L59
@pytholic I am using the highlighted part of the demo script to generate the reconstructions, but the only problem is that I am not able to generate the log txt file which has the trajectory needed to reproduce your below code which you have posted earlier. I am trying to find the read_trajectory input file out of the saved files which I am not getting. Any suggestion how to do that?
import glob
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
class CameraPose:
def __init__(self, meta, mat):
self.metadata = meta
self.pose = mat
def __str__(self):
return (
"Metadata : "
+ " ".join(map(str, self.metadata))
+ "\n"
+ "Pose : "
+ "\n"
+ np.array_str(self.pose)
)
def read_trajectory(filename):
traj = []
with open(filename, "r", encoding="utf-8") as f:
metastr = f.readline()
while metastr:
metadata = list(map(int, metastr.split()))
mat = np.zeros(shape=(4, 4))
for i in range(4):
matstr = f.readline()
mat[i, :] = np.fromstring(matstr, dtype=float, sep=" \t")
traj.append(CameraPose(metadata, mat))
metastr = f.readline()
return traj
volume = o3d.pipelines.integration.ScalableTSDFVolume(
voxel_length=4.0 / 512.0,
sdf_trunc=0.04,
color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8,
)
intrinsics = np.load("./results/intrinsics.npy")
fx = intrinsics[0][0]
fy = intrinsics[0][1]
cx = intrinsics[0][2]
cy = intrinsics[0][3]
color_images = np.load("./results/images.npy")
depth_images = np.load("./results/disps.npy")
camera_poses = read_trajectory("./results/log.txt")
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
width=576, height=336, fx=fx, fy=fy, cx=cx, cy=cy
)
for i in range(len(color_images)):
print(f"Integrate {i}-th image into the volume.")
color = color_images[i]
color = np.ascontiguousarray(color.transpose(1, 2, 0))
color = o3d.geometry.Image((color).astype(np.uint8))
depth = depth_images[i]
depth = np.ascontiguousarray(depth.transpose(0, 1))
depth = o3d.geometry.Image((depth * 255))
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
color, depth, depth_trunc=4.0, convert_rgb_to_intensity=False
)
volume.integrate(rgbd, camera_intrinsic, np.linalg.inv(camera_poses[i].pose))
print("Extract a triangle mesh from the volume and visualize it.")
mesh = volume.extract_triangle_mesh()
mesh.compute_vertex_normals()
o3d.visualization.draw_geometries([mesh])
@jmunshi1990 Did you ever figure this out? I'm trying to recreate @pytholic's script to visualize the output and I don't see any log.txt created anywhere.
@pytholic @jmunshi1990
I was facing a lot of issues with OpenGL on my GPU server. And with limited permissions, there was no scope to fix them. The visualization process would just fail for me when vis.create_window()
To circumvent this issue, I created a script which would create the necessary pointcloud without opening a window:
def save_reconstruction(video, save_path, device="cuda:0"):
""" Saves DROID-SLAM reconstruction results to .PLY files without visualization """
torch.cuda.set_device(device)
with torch.no_grad():
with video.get_lock():
dirty_index, = torch.where(video.dirty.clone())
if len(dirty_index) == 0:
print("No new frames to process.")
return
video.dirty[dirty_index] = False
# Convert poses to 4x4 matrices
poses = torch.index_select(video.poses, 0, dirty_index)
disps = torch.index_select(video.disps, 0, dirty_index)
Ps = SE3(poses).inv().matrix().cpu().numpy()
images = torch.index_select(video.images, 0, dirty_index)
images = images.cpu()[:, [2, 1, 0], 3::8, 3::8].permute(0, 2, 3, 1) / 255.0
points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
# Depth filtering
thresh = 0.3 * torch.ones_like(disps.mean(dim=[1, 2]))
count = droid_backends.depth_filter(video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
count = count.cpu()
disps = disps.cpu()
masks = ((count >= 2) & (disps > 0.5 * disps.mean(dim=[1, 2], keepdim=True)))
all_pts = []
all_clr = []
for i in range(len(dirty_index)):
mask = masks[i].reshape(-1)
pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
all_pts.append(pts)
all_clr.append(clr)
all_pts = np.concatenate(all_pts, axis=0)
all_clr = np.concatenate(all_clr, axis=0)
# Save points to .ply
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(all_pts)
pcd.colors = o3d.utility.Vector3dVector(all_clr)
o3d.io.write_point_cloud(f"points.ply", pcd, write_ascii=False)
print(f"Saved {len(all_pts)} points to points.ply")
# Save camera poses to .ply
cameras = []
for pose in Ps:
cam_actor = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
cam_actor.transform(pose)
cameras.append(cam_actor)
combined_camera_mesh = sum(cameras, o3d.geometry.TriangleMesh())
o3d.io.write_triangle_mesh(f"cameras.ply", combined_camera_mesh, write_ascii=False)
print(f"Saved {len(Ps)} camera poses to cameras.ply")
You will need to call this function here
You will need to call this function here
Hello, @aseemk98 , so if i got you right, this line
self.visualizer = Process(target=droid_visualization, args=(self.video,))
should look like this using your function
self.visualizer = Process(target=save_reconstruction, args=(self.video, "path/where/to/save/reconstruction"))
?
Here's the one I have, maybe this can be added: assumes path to be reconstruction and the following files:
├── reconstructions
│ ├── disps.npy
│ ├── images.npy
│ ├── intrinsics.npy
│ ├── poses.npy
│ └── tstamps.npy
import numpy as np
import matplotlib.pyplot as plt
import os
def load_reconstruction_data(base_path="reconstructions"):
"""Load all reconstruction data from the specified path"""
data = {}
try:
data['disps'] = np.load(os.path.join(base_path, "disps.npy"))
data['images'] = np.load(os.path.join(base_path, "images.npy"))
data['intrinsics'] = np.load(os.path.join(base_path, "intrinsics.npy"))
data['poses'] = np.load(os.path.join(base_path, "poses.npy"))
data['tstamps'] = np.load(os.path.join(base_path, "tstamps.npy"))
print(f"Loaded main reconstruction data from {base_path}")
except FileNotFoundError as e:
print(f"Could not load main data: {e}")
checkpoint_path = os.path.join(base_path, "./reconstructions")
if os.path.exists(checkpoint_path):
try:
data['checkpoint_disps'] = np.load(os.path.join(checkpoint_path, "disps.npy"))
data['checkpoint_images'] = np.load(os.path.join(checkpoint_path, "images.npy"))
data['checkpoint_poses'] = np.load(os.path.join(checkpoint_path, "poses.npy"))
data['checkpoint_intrinsics'] = np.load(os.path.join(checkpoint_path, "intrinsics.npy"))
data['checkpoint_tstamps'] = np.load(os.path.join(checkpoint_path, "tstamps.npy"))
print(f"Loaded checkpoint data from {checkpoint_path}")
except FileNotFoundError as e:
print(f"Could not load checkpoint data: {e}")
return data
def visualize_images(images, title="Images", max_display=9):
"""Visualize a subset of images in a grid"""
if len(images.shape) == 4: # (N, C, H, W) or (N, H, W, C)
n_images = min(max_display, images.shape[0])
cols = 3
rows = (n_images + cols - 1) // cols
fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))
if rows == 1:
axes = axes.reshape(1, -1)
for i in range(n_images):
row, col = i // cols, i % cols
img = images[i]
if img.shape[0] == 3 or img.shape[0] == 1: # Channel-first format
img = np.transpose(img, (1, 2, 0))
if img.max() > 1.0:
img = img / 255.0
if len(img.shape) == 3 and img.shape[2] == 1:
img = img[:, :, 0]
axes[row, col].imshow(img, cmap='gray')
elif len(img.shape) == 3 and img.shape[2] == 3:
axes[row, col].imshow(img)
else:
axes[row, col].imshow(img, cmap='gray')
axes[row, col].set_title(f"Frame {i}")
axes[row, col].axis('off')
for i in range(n_images, rows * cols):
row, col = i // cols, i % cols
axes[row, col].axis('off')
plt.suptitle(title)
plt.tight_layout()
def visualize_depth_maps(disps, title="Depth Maps", max_display=9):
"""Visualize displacement/depth maps"""
if len(disps.shape) >= 3:
n_maps = min(max_display, disps.shape[0])
cols = 3
rows = (n_maps + cols - 1) // cols
fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))
if rows == 1:
axes = axes.reshape(1, -1)
for i in range(n_maps):
row, col = i // cols, i % cols
depth_map = disps[i]
if len(depth_map.shape) == 3:
depth_map = depth_map[:, :, 0]
im = axes[row, col].imshow(depth_map, cmap='plasma')
axes[row, col].set_title(f"Depth Map {i}")
axes[row, col].axis('off')
plt.colorbar(im, ax=axes[row, col])
for i in range(n_maps, rows * cols):
row, col = i // cols, i % cols
axes[row, col].axis('off')
plt.suptitle(title)
plt.tight_layout()
def visualize_poses_3d(poses, title="Camera Trajectory"):
"""Visualize camera poses in 3D"""
if poses is None or len(poses) == 0:
print("No pose data available for visualization")
return
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
if len(poses.shape) == 2 and poses.shape[1] == 7:
positions = poses[:, :3] # First 3 elements are translation
print("Interpreting poses as quaternion format: translation + quaternion")
elif len(poses.shape) == 3 and poses.shape[1:] == (4, 4):
positions = poses[:, :3, 3]
elif len(poses.shape) == 2 and poses.shape[1] >= 3:
positions = poses[:, :3]
else:
print(f"Unknown pose format: {poses.shape}")
return
ax.plot(positions[:, 0], positions[:, 1], positions[:, 2], 'b-', linewidth=2, label='Trajectory')
ax.scatter(positions[0, 0], positions[0, 1], positions[0, 2], color='green', s=100, label='Start')
ax.scatter(positions[-1, 0], positions[-1, 1], positions[-1, 2], color='red', s=100, label='End')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title(title)
ax.legend()
# Make axes equal
max_range = np.array([positions[:, 0].max() - positions[:, 0].min(),
positions[:, 1].max() - positions[:, 1].min(),
positions[:, 2].max() - positions[:, 2].min()]).max() / 2.0
mid_x = (positions[:, 0].max() + positions[:, 0].min()) * 0.5
mid_y = (positions[:, 1].max() + positions[:, 1].min()) * 0.5
mid_z = (positions[:, 2].max() + positions[:, 2].min()) * 0.5
ax.set_xlim(mid_x - max_range, mid_x + max_range)
ax.set_ylim(mid_y - max_range, mid_y + max_range)
ax.set_zlim(mid_z - max_range, mid_z + max_range)
def visualize_trajectory(tstamps, poses=None, intrinsics=None):
"""Visualize camera trajectory over time"""
if tstamps is not None and len(tstamps) > 1:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes[0, 0].plot(range(len(tstamps)), tstamps, 'b-o', markersize=3)
axes[0, 0].set_xlabel('Frame Index')
axes[0, 0].set_ylabel('Timestamp')
axes[0, 0].set_title('Frame Timestamps')
axes[0, 0].grid(True)
if len(tstamps) > 1:
intervals = np.diff(tstamps)
axes[0, 1].plot(range(len(intervals)), intervals, 'r-o', markersize=3)
axes[0, 1].set_xlabel('Frame Index')
axes[0, 1].set_ylabel('Time Interval (s)')
axes[0, 1].set_title('Frame Intervals')
axes[0, 1].grid(True)
if poses is not None:
if len(poses.shape) == 2 and poses.shape[1] == 7:
positions = poses[:, :3]
elif len(poses.shape) == 3 and poses.shape[1:] == (4, 4):
positions = poses[:, :3, 3]
elif len(poses.shape) == 2 and poses.shape[1] >= 3:
positions = poses[:, :3]
else:
positions = None
if positions is not None:
axes[1, 0].plot(positions[:, 0], positions[:, 1], 'g-', linewidth=2)
axes[1, 0].scatter(positions[0, 0], positions[0, 1], color='green', s=50, label='Start')
axes[1, 0].scatter(positions[-1, 0], positions[-1, 1], color='red', s=50, label='End')
axes[1, 0].set_xlabel('X')
axes[1, 0].set_ylabel('Y')
axes[1, 0].set_title('Trajectory (Top View)')
axes[1, 0].grid(True)
axes[1, 0].legend()
axes[1, 0].axis('equal')
axes[1, 1].plot(range(len(positions)), positions[:, 2], 'purple', linewidth=2)
axes[1, 1].set_xlabel('Frame Index')
axes[1, 1].set_ylabel('Z (Height)')
axes[1, 1].set_title('Altitude over Time')
axes[1, 1].grid(True)
else:
axes[1, 0].text(0.5, 0.5, 'No pose data available', ha='center', va='center')
axes[1, 1].text(0.5, 0.5, 'No pose data available', ha='center', va='center')
plt.tight_layout()
def print_data_summary(data):
"""Print summary of loaded data"""
print("\n" + "="*50)
print("DROID-SLAM Reconstruction Data Summary")
print("="*50)
for key, value in data.items():
if isinstance(value, np.ndarray):
print(f"{key}:")
print(f" Shape: {value.shape}")
print(f" Dtype: {value.dtype}")
print(f" Min/Max: {value.min():.4f} / {value.max():.4f}")
print()
def main():
data = load_reconstruction_data()
if not data:
print("No data found to visualize!")
return
print_data_summary(data)
if 'images' in data:
visualize_images(data['images'], "Main Reconstruction - Images")
plt.show()
if 'disps' in data:
visualize_depth_maps(data['disps'], "Main Reconstruction - Depth Maps")
plt.show()
if 'poses' in data:
visualize_poses_3d(data['poses'], "Main Reconstruction - 3D Trajectory")
plt.show()
if 'tstamps' in data:
visualize_trajectory(data['tstamps'], data.get('poses'), data.get('intrinsics'))
plt.show()
if 'checkpoint_images' in data:
visualize_images(data['checkpoint_images'], "Checkpoint - Images")
plt.show()
if 'checkpoint_disps' in data:
visualize_depth_maps(data['checkpoint_disps'], "Checkpoint - Depth Maps")
plt.show()
if 'checkpoint_poses' in data:
visualize_poses_3d(data['checkpoint_poses'], "Checkpoint - 3D Trajectory")
plt.show()
if 'intrinsics' in data:
print("Camera Intrinsics:")
print(data['intrinsics'])
if 'checkpoint_intrinsics' in data:
print("Checkpoint Camera Intrinsics:")
print(data['checkpoint_intrinsics'])
if __name__ == "__main__":
main()
Lmk if this gives wrong results, there could be some fault in visualizing 3d poses function