[Question] Env not reset with sb3 train.py on high decimation
So I am using sb3 to train my robot and when I run it, it does not reset after 5 seconds as it should.
First I used the following settings: # Post initialization def post_init(self) -> None: """Post initialization.""" # self.sim.device = "cuda:0" # general settings self.decimation = 2 self.episode_length_s = 5 # viewer settings self.viewer.eye = (8.0, 0.0, 5.0) # simulation settings self.sim.dt = 1 / 120
self.sim.render_interval = self.decimation
and it worked without any issues but the robot arm control was quite jerky so I thought of updating the self.sim.dt to 1/500 and self.decimation = 1 so the actions are updated more often. The arm motion before very smooth after that.
But now the simualtion doesn't reset after 5 seconds. What is the issue here?
This is my cfg file:
# import dependencies for isaaclab
import os
import sys
import torch
from isaaclab.envs import ManagerBasedRLEnv, ManagerBasedRLEnvCfg
import isaaclab.sim as sim_utils
from isaaclab.assets import ArticulationCfg, AssetBaseCfg
from isaaclab.envs import mdp
from isaaclab.scene import InteractiveScene, InteractiveSceneCfg
from isaaclab.managers import (
EventTermCfg as EventTerm,
ObservationGroupCfg as ObsGroup,
ObservationTermCfg as ObsTerm,
RewardTermCfg as RewTerm,
TerminationTermCfg as DoneTerm,
ActionTerm as ActionTerm,
ActionTermCfg as ActionTermCfg,
)
from isaaclab.utils import configclass
from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
from isaaclab.sensors import CameraCfg
# Importing the robot arm cfg from Isaac Lab
sys.path.append(os.path.join(os.environ["HOME"], "arm_isaaclab_niris/"))
from franka_panda_art_cfg import (
FRANKA_PANDA_HIGH_PD_CFG,
# FRANKA_PANDA_CFG,
)
"""
from isaaclab_assets import (
FRANKA_PANDA_HIGH_PD_CFG,
# FRANKA_PANDA_CFG,
)
"""
# This configuration is useful for task-space control using differential IK. If using joint space, use default.
robot_cfg = FRANKA_PANDA_HIGH_PD_CFG.copy()
camera_cfg = CameraCfg(
prim_path="{ENV_REGEX_NS}/Robot/panda_instanceable/panda_hand/front_camera",
update_period=0.1,
height=480,
width=640,
data_types=["rgb", "distance_to_image_plane"],
spawn=None,
# spawn=sim_utils.PinholeCameraCfg(
# focal_length=24.0,
# focus_distance=400.0,
# horizontal_aperture=20.955,
# clipping_range=(0.1, 1.0e5),
# ),
)
from isaaclab.controllers import DifferentialIKControllerCfg
# Create controller
diff_ik_controller_cfg = mdp.DifferentialIKControllerCfg(
command_type="pose", # Control position and orientation
use_relative_mode=False, # Absolute poses for direct targeting
ik_method="dls", # Damped least squares for smoothness
ik_params={"lambda_val": 0.1}, # Moderate damping
)
# Scene cfg
@configclass
class SceneCfg(InteractiveSceneCfg):
"""Configuration for a Wheeled robot scene."""
# -- World
# Distant light
distant_light = AssetBaseCfg(
prim_path="/World/DistantLight",
spawn=sim_utils.DistantLightCfg(color=(0.9, 0.9, 0.9), intensity=2500.0),
init_state=AssetBaseCfg.InitialStateCfg(rot=(0.738, 0.477, 0.477, 0.0)),
)
# Dorm light
dome_light = AssetBaseCfg(
prim_path="/World/Light",
spawn=sim_utils.DomeLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75)),
)
# Ground plane
ground = AssetBaseCfg(
prim_path="/World/defaultGroundPlane",
spawn=sim_utils.GroundPlaneCfg(),
init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
)
# -- Env
# Forklift object
forklift = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Forklift",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Forklift/forklift.usd",
scale=(1.0, 1.0, 1.0),
),
init_state=AssetBaseCfg.InitialStateCfg(pos=(8.0, 0.0, 0.0)),
)
# Robot mount
table = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Table",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/Stand/stand_instanceable.usd",
scale=(2.0, 2.0, 2.0),
),
)
# Robot articulation
robot: ArticulationCfg = robot_cfg.replace(prim_path="{ENV_REGEX_NS}/Robot")
# Camera (attached to the end-effector)
front_camera: CameraCfg = camera_cfg
# Actions cfg
@configclass
class ActionsCfg:
"""Action specifications for the environment."""
diff_ik_action_cfg = mdp.DifferentialInverseKinematicsActionCfg(
asset_name="robot", # Name of the Franka robot in the scene
joint_names=["panda_joint.*"], # Regex for Panda arm joints
body_name="panda_hand", # End-effector frame
body_offset=None, # No offset (target is directly at panda_hand)
scale=1.0, # No scaling of action
controller=diff_ik_controller_cfg, # Link to tuned controller
debug_vis=True, # Enable visualization for debugging
clip=None, # No clipping (rely on joint limits)
)
# Can use joint space as well
sys.path.append(os.path.join(os.environ["HOME"], "arm_isaaclab_niris/"))
import arm_mdp
# Observations cfg
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
# The joint positions of the asset w.r.t. the default joint positions.
# joint_positions_rel = ObsTerm(func=mdp.joint_pos_rel)
# joint_positions_norm = ObsTerm(func=mdp.joint_pos_limit_normalized)
joint_positions = ObsTerm(func=mdp.joint_pos)
joint_velocities = ObsTerm(func=mdp.joint_vel)
actions = ObsTerm(func=mdp.last_action)
yolo_detection = ObsTerm(func=arm_mdp.yolo_detection_highest)
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = False
# observation groups
policy: PolicyCfg = PolicyCfg()
# Event cfg
@configclass
class EventCfg:
"""Configuration for events with fixed orientation."""
reset_scene = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Constant running reward
# alive = RewTerm(func=mdp.is_alive, weight=1.0)
# (2) Failure penalty
# terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
# (3) Confidence
detection_reward = RewTerm(func=arm_mdp.yolo_detection_reward_highest, weight=1.0)
# confidence_reward = RewTerm(func=arm_mdp.yolo_confidence_reward_highest, weight=100.0)
# Terminations cfg
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Time out
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Confidence Threshold reached
# confidence_threshold_terminate = DoneTerm(func=mdp.confidence_threshold_terminate_v2)
@configclass
class CommandsCfg:
"""Command terms for the MDP."""
# no commands for this MDP
null = mdp.NullCommandCfg()
@configclass
class CurriculumCfg:
"""Configuration for the curriculum."""
pass
@configclass
class ArmEnvCfg(ManagerBasedRLEnvCfg):
"""Configuration for the wheeled environment."""
sim: sim_utils.SimulationContext
# Scene settings
scene: SceneCfg = SceneCfg(num_envs=1, env_spacing=30.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
events: EventCfg = EventCfg()
# MDP settings
curriculum: CurriculumCfg = CurriculumCfg()
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# No command generator
commands: CommandsCfg = CommandsCfg()
# Post initialization
def __post_init__(self) -> None:
"""Post initialization."""
# self.sim.device = "cuda:0"
# general settings
self.decimation = 1
self.episode_length_s = 5
self.max_episode_length = 5
# viewer settings
self.viewer.eye = (8.0, 0.0, 5.0)
# simulation settings
# self.sim.dt = 1 / 120
self.sim.dt = 1 / 500
self.sim.render_interval = self.decimation
import cv2 # Import OpenCV
Thanks for posting this. There is a discussion about integrating sb3 with Isaac Lab in the issue #1769. Is this something that addresses your task as well?