dm_control icon indicating copy to clipboard operation
dm_control copied to clipboard

How to create an environment to give depth images?

Open PeterMitrano opened this issue 4 years ago • 1 comments

The vision environment provide only RGB images, instead of RGBD (depth) images, but I see some partial implementation of depth images. Here's my attempt to set depth=True, but it failes because the dtype is set to uint8 which cannot be inf.

import collections

import numpy as np
from dm_control import manipulation
from dm_control.manipulation.bricks import _reassemble
from dm_control.manipulation.shared import registry, tags, observations
from dm_control.manipulation.shared.observations import ObservationSettings, _ENABLED_FEATURE, _ENABLED_FTT, \
    _DISABLED_FEATURE, CameraObservableSpec, ObservableSpec


class MyCameraObservableSpec(collections.namedtuple(
    'CameraObservableSpec', ('depth', 'height', 'width') + ObservableSpec._fields)):
    """Configuration options for camera observables."""
    __slots__ = ()


ENABLED_CAMERA_DEPTH = MyCameraObservableSpec(
    height=84,
    width=84,
    enabled=True,
    depth=True,
    update_interval=1,
    buffer_size=1,
    delay=0,
    aggregator=None,
    corruptor=None)

VISION_DEPTH = ObservationSettings(
    proprio=_ENABLED_FEATURE,
    ftt=_ENABLED_FTT,
    prop_pose=_DISABLED_FEATURE,
    camera=ENABLED_CAMERA_DEPTH)


@registry.add(tags.VISION)
def my_env():
    return _reassemble(obs_settings=VISION_DEPTH,
                       num_bricks=5,
                       randomize_initial_order=True,
                       randomize_desired_order=True)


env = manipulation.load('my_env', seed=0)

spec = env.action_spec()

for i in range(10):
    a = np.random.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(a)

PeterMitrano avatar Oct 26 '21 23:10 PeterMitrano

This is actually a bug in the code. Turns out that when you call obs.configure(depth=True), it only sets the depth attribute and forgets to update the dtype attribute, which is only set in the constructor.

A hacky fix is to edit dm_control/dm_control/manipulation/shared/cameras.py:

def add_camera_observables(entity, obs_settings, *camera_specs):
  """Adds cameras to an entity's worldbody and configures observables for them.

  Args:
    entity: A `composer.Entity`.
    obs_settings: An `observations.ObservationSettings` instance.
    *camera_specs: Instances of `CameraSpec`.

  Returns:
    A `collections.OrderedDict` keyed on camera names, containing pre-configured
    `observable.MJCFCamera` instances.
  """
  obs_dict = collections.OrderedDict()
  for spec in camera_specs:
    camera = entity.mjcf_model.worldbody.add('camera', **spec._asdict())
    obs = observable.MJCFCamera(camera)
    obs.configure(**obs_settings.camera._asdict())
    # ====== #
    obs.configure(depth=True)
    obs._dtype = np.float32  # Explicitly change the dtype.
    obs._n_channels = 1  # Explicitly change the no. of channels.
    # ====== #
    obs_dict[spec.name] = obs
  return obs_dict

Now when I run explore with say stack_2_bricks_vision, I can plot timestep.observation["front_close"] and obtain the following (84, 84, 1) np.float32 array:

Screen Shot 2022-03-08 at 4 36 11 PM

kevinzakka avatar Mar 09 '22 00:03 kevinzakka