FoundationPose icon indicating copy to clipboard operation
FoundationPose copied to clipboard

Can I visualize run_linemod.py rgb images just like rundemo

Open wsq1010 opened this issue 1 year ago • 2 comments

Thanks for your contribution, can I see the lined rgb image by changing the debug parameter when running run_linemod.py

wsq1010 avatar Apr 08 '24 03:04 wsq1010

My vis_refiner.png is blank

wsq1010 avatar Apr 08 '24 09:04 wsq1010

we haven't implemented this, but you can refer to the run_demo.py. It shouldn't be hard to adapt here.

wenbowen123 avatar Apr 09 '24 00:04 wenbowen123

Thanks for your contribution, can I see the lined rgb image by changing the debug parameter when running run_linemod.py

This is how I visualize the linemod, modify the function run_pose_estimation_worker() in run_linemod.py as:

def run_pose_estimation_worker(reader, i_frames, est:FoundationPose=None, debug=0, ob_id=None, device='cuda:0'):
  torch.cuda.set_device(device)
  est.to_device(device)
  est.glctx = dr.RasterizeCudaContext(device=device)

  result = NestDict()

 # add code here, to get mesh
  mesh_file = reader.get_gt_mesh_file(ob_id)
  mesh = trimesh.load(mesh_file)
  # change mesh unit to meter
  mesh.vertices = mesh.vertices/1000
  to_origin, extents = trimesh.bounds.oriented_bounds(mesh)
  bbox = np.stack([-extents/2, extents/2], axis=0).reshape(2,3)

  for i, i_frame in enumerate(i_frames):
    logging.info(f"{i}/{len(i_frames)}, i_frame:{i_frame}, ob_id:{ob_id}")
    video_id = reader.get_video_id()
    color = reader.get_color(i_frame)
    depth = reader.get_depth(i_frame)
    id_str = reader.id_strs[i_frame]
    H,W = color.shape[:2]

    debug_dir =est.debug_dir

    ob_mask = get_mask(reader, i_frame, ob_id, detect_type=detect_type)
    if ob_mask is None:
      logging.info("ob_mask not found, skip")
      result[video_id][id_str][ob_id] = np.eye(4)
      return result

    est.gt_pose = reader.get_gt_pose(i_frame, ob_id)

    pose = est.register(K=reader.K, rgb=color, depth=depth, ob_mask=ob_mask, ob_id=ob_id)
    logging.info(f"pose:\n{pose}")

    if debug>=3:
      m = est.mesh_ori.copy()
      tmp = m.copy()
      tmp.apply_transform(pose)
      tmp.export(f'{debug_dir}/model_tf.obj')

    # add code here, visulization
    center_pose = [email protected](to_origin)
    vis = draw_posed_3d_box(reader.K, img=color, ob_in_cam=center_pose, bbox=bbox)
    vis = draw_xyz_axis(color, ob_in_cam=center_pose, scale=0.1, K=reader.K, thickness=2, transparency=0, is_input_rgb=True)
    cv2.imshow('1', vis[...,::-1])
    cv2.waitKey(1)

    result[video_id][id_str][ob_id] = pose

  return result

huiwenzhang avatar Apr 23 '24 06:04 huiwenzhang