ManiSkill icon indicating copy to clipboard operation
ManiSkill copied to clipboard

`BaseEnv.render()` Returns the Last Frame of the Previous Episode After `BaseEnv.reset()`

Open hesic73 opened this issue 4 months ago • 3 comments

In my training script, I have two environments:

  • One environment for collecting online data.
  • An evaluation environment (eval_env) with num_envs > 1, which I use to periodically evaluate the policy.

However, from the second evaluation onwards, the first frame in the recorded video is always the last frame from the previous episode. This happens even though I explicitly call eval_env.reset() before starting the evaluation.

While I consistently observe this issue in my training script, I cannot reproduce it with the following minimal script. Besides, if I set num_envs=1 in my training script (but still run it on GPU), the issue does not occur.

Do you have any insights into what might be causing this issue, especially given that it only occurs with num_envs > 1?

import numpy as np
from typing import List, Tuple, Optional, Callable, Dict
import imageio.v3 as iio
from gymnasium import Wrapper, Env
import gymnasium as gym


from mani_skill.envs import BaseEnv
from mani_skill.utils.structs import Actor, Pose
from mani_skill.utils import sapien_utils
from mani_skill.sensors.camera import CameraConfig

import torch

from typing import Any, Optional, Tuple, Sequence, Dict, List, Union, Callable

from mani_skill.utils.registration import register_env
import time


@register_env("MyEnv-v0", max_episode_steps=50)
class MyEnv(BaseEnv):
    def __init__(self, *args, **kwargs):
        self._first_initialized = False
        super().__init__(*args, **kwargs)

    @property
    def _default_human_render_camera_configs(self):
        pose = sapien_utils.look_at([0.5, 0.5, 1.2], [0.0, 0.0, 0.5])
        return [CameraConfig("render_camera", pose, 640, 480, 1, 0.01, 100)]

    def _load_scene(self, options: dict):
        builder = self.scene.create_actor_builder()
        half_size = 0.05
        builder.add_box_collision(
            half_size=[half_size] * 3,
        )
        builder.add_box_visual(
            half_size=[half_size] * 3,
            material=[1, 0, 0],
        )

        self.cube = builder.build(name='cube')

    def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
        self.cube.set_pose(Pose.create_from_pq(
            p=torch.tensor([0, 0, 0.5], device=self.device)))
        self.agent.robot.set_pose(Pose.create_from_pq(
            p=torch.tensor([-100, -100, 0.0], device=self.device),
        ))
        pass

    def evaluate(self) -> dict:
        fail = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
        return dict(fail=fail)


class _PreStepOrAnyDoneWrapper(Wrapper):
    def __init__(self, env, func: Callable[[Env,], None], done_indices: Optional[List[int]] = None):
        super().__init__(env)
        self._func = func
        self._done_indices = done_indices

    def step(self, action):
        self._func(self.env)
        obs, reward, terminated, truncated, info = self.env.step(action)
        done = terminated | truncated
        if self._done_indices is None:
            any_done = done.any()
        else:
            any_done = done[self._done_indices].any()
        if any_done:
            self._func(self.env)
        return obs, reward, terminated, truncated, info


class VideoRecorder:
    def __init__(self):
        self._frames: List[np.ndarray] = []

    def record_frame(self, frame: np.ndarray):
        self._frames.append(frame)

    def reset(self):
        self._frames = []

    def save(self, path: str):
        if not self._frames:
            raise ValueError("No frames to save")
        iio.imwrite(path, self._frames, fps=30, codec='libx264')


def rollout_until_first_done(env: BaseEnv):
    obs, info = env.reset()
    while True:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action=action)
        done = terminated | truncated
        if done[0].item():
            break


def main():

    env = gym.make(id="MyEnv-v0",
                   robot_uids='panda',
                   render_mode='rgb_array',
                   sim_backend='gpu',
                   reward_mode='none',
                   num_envs=128,
                   )

    video_recorder = VideoRecorder()

    def record_frame_func(env: BaseEnv):
        frame: torch.Tensor = env.render()
        if len(frame.shape) > 3:
            assert len(frame.shape) == 4
            frame = frame[0]
        assert len(frame.shape) == 3  # (H, W, C)
        frame = frame.cpu().numpy()
        video_recorder.record_frame(frame)

    env = _PreStepOrAnyDoneWrapper(env, record_frame_func, done_indices=[0])

    rollout_until_first_done(env)

    video_recorder.reset()
    rollout_until_first_done(env)
    video_recorder.save('demo.mp4')


if __name__ == '__main__':
    main()

hesic73 avatar Oct 11 '24 01:10 hesic73