[Question] Env not reset with sb3 train.py on high decimation

Open shiven2001 opened this issue 8 months ago • 1 comments

So I am using sb3 to train my robot and when I run it, it does not reset after 5 seconds as it should.

First I used the following settings: # Post initialization def post_init(self) -> None: """Post initialization.""" # self.sim.device = "cuda:0" # general settings self.decimation = 2 self.episode_length_s = 5 # viewer settings self.viewer.eye = (8.0, 0.0, 5.0) # simulation settings self.sim.dt = 1 / 120

          self.sim.render_interval = self.decimation

and it worked without any issues but the robot arm control was quite jerky so I thought of updating the self.sim.dt to 1/500 and self.decimation = 1 so the actions are updated more often. The arm motion before very smooth after that.

But now the simualtion doesn't reset after 5 seconds. What is the issue here?

This is my cfg file:

  # import dependencies for isaaclab
  import os
  import sys
  import torch
  from isaaclab.envs import ManagerBasedRLEnv, ManagerBasedRLEnvCfg
  import isaaclab.sim as sim_utils
  from isaaclab.assets import ArticulationCfg, AssetBaseCfg
  from isaaclab.envs import mdp
  from isaaclab.scene import InteractiveScene, InteractiveSceneCfg
  from isaaclab.managers import (
      EventTermCfg as EventTerm,
      ObservationGroupCfg as ObsGroup,
      ObservationTermCfg as ObsTerm,
      RewardTermCfg as RewTerm,
      TerminationTermCfg as DoneTerm,
      ActionTerm as ActionTerm,
      ActionTermCfg as ActionTermCfg,
  )
  from isaaclab.utils import configclass
  from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
  from isaaclab.sensors import CameraCfg
  
  # Importing the robot arm cfg from Isaac Lab
  sys.path.append(os.path.join(os.environ["HOME"], "arm_isaaclab_niris/"))
  from franka_panda_art_cfg import (
      FRANKA_PANDA_HIGH_PD_CFG,
      # FRANKA_PANDA_CFG,
  )
  
  """
  from isaaclab_assets import (
      FRANKA_PANDA_HIGH_PD_CFG,
      # FRANKA_PANDA_CFG,
  )
  """
  
  # This configuration is useful for task-space control using differential IK. If using joint space, use default.
  robot_cfg = FRANKA_PANDA_HIGH_PD_CFG.copy()
  
  camera_cfg = CameraCfg(
      prim_path="{ENV_REGEX_NS}/Robot/panda_instanceable/panda_hand/front_camera",
      update_period=0.1,
      height=480,
      width=640,
      data_types=["rgb", "distance_to_image_plane"],
      spawn=None,
      # spawn=sim_utils.PinholeCameraCfg(
      #    focal_length=24.0,
      #    focus_distance=400.0,
      #    horizontal_aperture=20.955,
      #    clipping_range=(0.1, 1.0e5),
      # ),
  )
  
  from isaaclab.controllers import DifferentialIKControllerCfg
  
  # Create controller
  diff_ik_controller_cfg = mdp.DifferentialIKControllerCfg(
      command_type="pose",  # Control position and orientation
      use_relative_mode=False,  # Absolute poses for direct targeting
      ik_method="dls",  # Damped least squares for smoothness
      ik_params={"lambda_val": 0.1},  # Moderate damping
  )
  
  
  # Scene cfg
  @configclass
  class SceneCfg(InteractiveSceneCfg):
      """Configuration for a Wheeled robot scene."""
  
      # -- World
      # Distant light
      distant_light = AssetBaseCfg(
          prim_path="/World/DistantLight",
          spawn=sim_utils.DistantLightCfg(color=(0.9, 0.9, 0.9), intensity=2500.0),
          init_state=AssetBaseCfg.InitialStateCfg(rot=(0.738, 0.477, 0.477, 0.0)),
      )
      # Dorm light
      dome_light = AssetBaseCfg(
          prim_path="/World/Light",
          spawn=sim_utils.DomeLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75)),
      )
      # Ground plane
      ground = AssetBaseCfg(
          prim_path="/World/defaultGroundPlane",
          spawn=sim_utils.GroundPlaneCfg(),
          init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
      )
  
      # -- Env
      # Forklift object
      forklift = AssetBaseCfg(
          prim_path="{ENV_REGEX_NS}/Forklift",
          spawn=sim_utils.UsdFileCfg(
              usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Forklift/forklift.usd",
              scale=(1.0, 1.0, 1.0),
          ),
          init_state=AssetBaseCfg.InitialStateCfg(pos=(8.0, 0.0, 0.0)),
      )
  
      # Robot mount
      table = AssetBaseCfg(
          prim_path="{ENV_REGEX_NS}/Table",
          spawn=sim_utils.UsdFileCfg(
              usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/Stand/stand_instanceable.usd",
              scale=(2.0, 2.0, 2.0),
          ),
      )
      # Robot articulation
      robot: ArticulationCfg = robot_cfg.replace(prim_path="{ENV_REGEX_NS}/Robot")
  
      # Camera (attached to the end-effector)
      front_camera: CameraCfg = camera_cfg
  
  
  # Actions cfg
  @configclass
  class ActionsCfg:
      """Action specifications for the environment."""
  
      diff_ik_action_cfg = mdp.DifferentialInverseKinematicsActionCfg(
          asset_name="robot",  # Name of the Franka robot in the scene
          joint_names=["panda_joint.*"],  # Regex for Panda arm joints
          body_name="panda_hand",  # End-effector frame
          body_offset=None,  # No offset (target is directly at panda_hand)
          scale=1.0,  # No scaling of action
          controller=diff_ik_controller_cfg,  # Link to tuned controller
          debug_vis=True,  # Enable visualization for debugging
          clip=None,  # No clipping (rely on joint limits)
      )
  
      # Can use joint space as well
  
  
  sys.path.append(os.path.join(os.environ["HOME"], "arm_isaaclab_niris/"))
  import arm_mdp
  
  
  # Observations cfg
  @configclass
  class ObservationsCfg:
      """Observation specifications for the MDP."""
  
      @configclass
      class PolicyCfg(ObsGroup):
          """Observations for policy group."""
  
          # observation terms (order preserved)
          # The joint positions of the asset w.r.t. the default joint positions.
          # joint_positions_rel = ObsTerm(func=mdp.joint_pos_rel)
          # joint_positions_norm = ObsTerm(func=mdp.joint_pos_limit_normalized)
          joint_positions = ObsTerm(func=mdp.joint_pos)
          joint_velocities = ObsTerm(func=mdp.joint_vel)
          actions = ObsTerm(func=mdp.last_action)
          yolo_detection = ObsTerm(func=arm_mdp.yolo_detection_highest)
  
          def __post_init__(self) -> None:
              self.enable_corruption = False
              self.concatenate_terms = False
  
      # observation groups
      policy: PolicyCfg = PolicyCfg()
  
  
  # Event cfg
  @configclass
  class EventCfg:
      """Configuration for events with fixed orientation."""
  
      reset_scene = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
  
  
  @configclass
  class RewardsCfg:
      """Reward terms for the MDP."""
  
      # (1) Constant running reward
      # alive = RewTerm(func=mdp.is_alive, weight=1.0)
      # (2) Failure penalty
      # terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
      # (3) Confidence
      detection_reward = RewTerm(func=arm_mdp.yolo_detection_reward_highest, weight=1.0)
      # confidence_reward = RewTerm(func=arm_mdp.yolo_confidence_reward_highest, weight=100.0)
  
  
  # Terminations cfg
  @configclass
  class TerminationsCfg:
      """Termination terms for the MDP."""
  
      # (1) Time out
      time_out = DoneTerm(func=mdp.time_out, time_out=True)
      # (2) Confidence Threshold reached
      # confidence_threshold_terminate = DoneTerm(func=mdp.confidence_threshold_terminate_v2)
  
  
  @configclass
  class CommandsCfg:
      """Command terms for the MDP."""
  
      # no commands for this MDP
      null = mdp.NullCommandCfg()
  
  
  @configclass
  class CurriculumCfg:
      """Configuration for the curriculum."""
  
      pass
  
  
  @configclass
  class ArmEnvCfg(ManagerBasedRLEnvCfg):
      """Configuration for the wheeled environment."""
  
      sim: sim_utils.SimulationContext
      # Scene settings
      scene: SceneCfg = SceneCfg(num_envs=1, env_spacing=30.0)
      # Basic settings
      observations: ObservationsCfg = ObservationsCfg()
      actions: ActionsCfg = ActionsCfg()
      events: EventCfg = EventCfg()
      # MDP settings
      curriculum: CurriculumCfg = CurriculumCfg()
      rewards: RewardsCfg = RewardsCfg()
      terminations: TerminationsCfg = TerminationsCfg()
      # No command generator
      commands: CommandsCfg = CommandsCfg()
  
      # Post initialization
      def __post_init__(self) -> None:
          """Post initialization."""
          # self.sim.device = "cuda:0"
          # general settings
          self.decimation = 1
          self.episode_length_s = 5
          self.max_episode_length = 5
          # viewer settings
          self.viewer.eye = (8.0, 0.0, 5.0)
          # simulation settings
          # self.sim.dt = 1 / 120
          self.sim.dt = 1 / 500
  
          self.sim.render_interval = self.decimation
  
  
  import cv2  # Import OpenCV

Apr 25 '25 10:04 shiven2001

Thanks for posting this. There is a discussion about integrating sb3 with Isaac Lab in the issue #1769. Is this something that addresses your task as well?

Apr 26 '25 03:04 RandomOakForest