Genesis
Genesis copied to clipboard
[Bug]: NaN Values Occurring During Training Causing Training Interruption and Value Loss Explosion
trafficstars
Bug Description
During training with the Genesis Physics Simulator, the system consistently generates NaN (Not a Number) values which cause the training process to halt unexpectedly. This instability persists even after adjusting simulation parameters such as substeps and dt (delta time) values. The issue is accompanied by an explosion in value loss metrics. As a temporary workaround, I've implemented early termination when NaN values are detected, but the training still fails to progress properly due to the exploding value loss.
I think it is a similar to https://github.com/Genesis-Embodied-AI/Genesis/issues/653
Steps to Reproduce
import numpy as np
import torch
import wandb
from rsl_rl.runners import OnPolicyRunner
from utils import * # gs_rand_float
import genesis as gs
from genesis.engine.solvers.rigid.rigid_solver_decomp import RigidSolver
class HumanoidEnv:
def __init__(self, num_envs = 2048, headless = True, debug = False):
self.num_envs = num_envs
self.num_obs = 45 # position + velocity
self.num_privileged_obs = None
self.num_actions = 17 # -1 ~ 1 regularized
self.reward_scales = {
'linvel': 0.2,
'quadctrl': 0.01,
'alive': 0.5
}
self.headless = headless
self.debug = debug
self.dt = 0.01 # 100 Hz
self.n_frames = 4
sim_dt = self.dt
sim_substeps = self.n_frames
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.max_episode_length = 1000
self.reset_noise_scale = 0.01
self.healthy_z_range = torch.tensor((1.0, 2.0), device = self.device)
self.scene = gs.Scene(
sim_options = gs.options.SimOptions(
dt = sim_dt,
substeps = sim_substeps
),
viewer_options = gs.options.ViewerOptions(
max_FPS = int(1 / sim_dt),
camera_pos = (2.0, 0.0, 2.5),
camera_lookat = (0.0, 0.0, 0.5),
camera_fov = 40
),
vis_options = gs.options.VisOptions(
n_rendered_envs = 1
),
rigid_options = gs.options.RigidOptions(
constraint_solver = gs.constraint_solver.Newton,
integrator = gs.integrator.Euler,
iterations = 80,
ls_iterations = 40,
tolerance = 1e-8,
enable_collision = True,
enable_self_collision = True,
enable_joint_limit = True
),
show_viewer = not self.headless,
show_FPS = False
)
# Rigid Solver Settings
for solver in self.scene.sim.solvers:
if not isinstance(solver, RigidSolver):
continue
self.rigid_solver = solver
# Add Plane
self.scene.add_entity(
gs.morphs.URDF(file = 'urdf/plane/plane.urdf', fixed = True)
)
self.robot = self.scene.add_entity(
gs.morphs.MJCF(
file = 'humanoid_genesis.xml',
),
visualize_contact = self.debug,
vis_mode = 'collision'
)
self.scene.build(n_envs = self.num_envs)
# 1 body에 여러 개의 joint가 있을 경우 merged joint 처리
# https://github.com/Genesis-Embodied-AI/Genesis/issues/762
# https://github.com/Genesis-Embodied-AI/Genesis/pull/793
self.joint_names = [
'abdomen_z',
# 'abdomen_y',
'abdomen_x',
'right_hip_x',
# 'right_hip_y',
# 'right_hip_z',
'right_knee',
'left_hip_x',
# 'left_hip_y',
# 'left_hip_z',
'left_knee',
'right_shoulder1',
# 'right_shoulder2',
'right_elbow',
'left_shoulder1',
# 'left_shoulder2',
'left_elbow'
]
motor_dofs = [
self.robot.get_joint(name).dof_idx_local for name in self.joint_names
]
self.motor_dofs = []
for dof in motor_dofs:
if isinstance(dof, list):
self.motor_dofs.extend(dof)
else:
self.motor_dofs.append(dof)
self.gear = torch.tensor(
[100, 100, 100, 100, 100, 300, 200, 100, 100, 300, 200, 25, 25, 25, 25, 25, 25],
device = self.device,
dtype = gs.tc_float
)
self.force_ranges = torch.tensor(
[40, 40, 40, 40, 40, 120, 80, 40, 40, 120, 80, 10, 10, 10, 10, 10, 10],
device = self.device,
dtype = gs.tc_float
)
# Control Range
self.action_min = torch.tensor(-0.4, device = self.device)
self.action_max = torch.tensor(0.4, device = self.device)
# Set Force Range
# MuJoCo에서 force range를 별도로 설정하지 않으면 [0, 0]으로 덮어씌워짐.
# https://github.com/Genesis-Embodied-AI/Genesis/issues/619
self.robot.set_dofs_force_range(
dofs_idx_local = self.motor_dofs,
lower = -self.force_ranges,
upper = self.force_ranges)
self.init_qpos = torch.tensor(self.robot.init_qpos, device = self.device, dtype = gs.tc_float)
# Prepare reward functions
self.reward_functions, self.episode_sums = dict(), dict()
for name in self.reward_scales.keys():
# self.reward_scales[name] *= self.dt
self.reward_functions[name] = getattr(self, '_reward_' + name)
self.episode_sums[name] = torch.zeros((self.num_envs, ), device = self.device, dtype = gs.tc_float)
# Init Buffers
self.qpos = torch.zeros((self.num_envs, self.robot.n_qs), device = self.device, dtype = gs.tc_float)
self.qvel = torch.zeros((self.num_envs, self.robot.n_dofs), device = self.device, dtype = gs.tc_float)
self.base_pos = torch.zeros((self.num_envs, 3), device = self.device, dtype = gs.tc_float)
self.base_lin_vel = torch.zeros((self.num_envs, 3), device = self.device, dtype = gs.tc_float)
self.com_before = self.rigid_solver.get_links_COM([self.robot.base_link_idx]).squeeze(dim = 1)
self.observation_buf = torch.zeros((self.num_envs, self.num_obs), device = self.device, dtype = gs.tc_float)
self.reward_buf = torch.zeros((self.num_envs, ), device = self.device, dtype = gs.tc_float)
self.reset_buf = torch.ones((self.num_envs, ), device = self.device, dtype = gs.tc_int)
self.episode_length_buf = torch.zeros((self.num_envs,), device = self.device, dtype = gs.tc_int)
self.actions = torch.zeros((self.num_envs, self.num_actions), device = self.device, dtype = gs.tc_float)
self.extras = dict()
def reset_idx(self, envs_idx):
if len(envs_idx) == 0:
return
# reset dofs (including base)
self.qpos[envs_idx] = (
self.init_qpos
) + gs_rand_float(-self.reset_noise_scale, self.reset_noise_scale, (len(envs_idx), self.robot.n_qs), self.device)
self.qvel[envs_idx] = gs_rand_float(-self.reset_noise_scale, self.reset_noise_scale, (len(envs_idx), self.robot.n_dofs), self.device)
self.robot.set_qpos(
self.qpos[envs_idx],
envs_idx = envs_idx
)
self.robot.set_dofs_velocity(
self.qvel[envs_idx],
envs_idx = envs_idx
)
# reset buffers
self.base_pos[envs_idx] = self.robot.get_pos(envs_idx = envs_idx)
self.base_lin_vel[envs_idx] = self.robot.get_vel(envs_idx = envs_idx)
self.episode_length_buf[envs_idx] = 0
self.reset_buf[envs_idx] = True
# fill extras
self.extras['episode'] = {}
for key in self.episode_sums.keys():
self.extras['episode']['reward_' + key] = (
torch.mean(self.episode_sums[key][envs_idx]).item()
)
self.episode_sums[key][envs_idx] = 0.0
def reset(self):
self.reset_idx(torch.arange(self.num_envs, device = self.device))
return self.observation_buf, None
def step(self, actions):
# Scale action from [-1, 1] to actuator limits
self.actions = (actions + 1.0) * (self.action_max - self.action_min) * 0.5 + self.action_min
self.com_before = self.rigid_solver.get_links_COM([self.robot.base_link_idx]).squeeze(dim = 1)
# for _ in range(self.n_frames):
self.robot.control_dofs_force(self.actions * self.gear, self.motor_dofs)
self.scene.step()
qvel = self.robot.get_dofs_velocity()
qvel_mask = torch.isnan(qvel)
qvel_mask = torch.any(qvel_mask, dim = 1)
self.episode_length_buf += 1
self.base_pos[:] = self.robot.get_pos()
self.base_lin_vel[:] = self.robot.get_vel()
# Check reset (termination or truncation)
self.reset_buf = self.episode_length_buf > self.max_episode_length # truncation
self.reset_buf |= self.base_pos[:, 2] < self.healthy_z_range[0] # termination
self.reset_buf |= self.base_pos[:, 2] > self.healthy_z_range[1] # termination
self.reset_buf |= qvel_mask
# Check truncation (timeout)
timeout_idx = (self.episode_length_buf > self.max_episode_length).nonzero(as_tuple = False).flatten()
self.extras['time_outs'] = torch.zeros_like(self.reset_buf, device = self.device, dtype = gs.tc_float)
self.extras['time_outs'][timeout_idx] = 1.0
self.reset_idx(self.reset_buf.nonzero(as_tuple = False).flatten())
# Compute rewards
self.reward_buf[:] = 0.0
for name, reward_func in self.reward_functions.items():
reward = reward_func() * self.reward_scales[name]
self.reward_buf += reward
self.episode_sums[name] += reward
# Compute observation
self.observation_buf[:] = self.get_observations()
return self.observation_buf, None, self.reward_buf, self.reset_buf, self.extras
def get_observations(self, envs_idx = None):
qpos = self.robot.get_qpos(envs_idx = envs_idx)[:, 2:]
qvel = self.robot.get_dofs_velocity(envs_idx = envs_idx)
return torch.cat([qpos, qvel], dim = -1)
def get_privileged_observations(self):
return None
# reward functions
def _reward_linvel(self):
com_after = self.rigid_solver.get_links_COM([self.robot.base_link_idx]).squeeze(dim = 1)
com_vel = (com_after - self.com_before) / self.dt
return com_vel[:, 0]
def _reward_quadctrl(self):
return -torch.sum(torch.square(self.actions), dim = 1)
def _reward_alive(self):
return 1.0
if __name__ == '__main__':
gs.init(backend = gs.gpu)
env = HumanoidEnv()
# env = HumanoidEnv(headless = False, debug = True)
log_dir = 'logs/humanoid'
train_cfg = {
'algorithm': {
'clip_param': 0.2,
'desired_kl': 0.01,
'entropy_coef': 1e-4,
'gamma': 0.97,
'lam': 0.95,
'learning_rate': 0.0003,
'max_grad_norm': 10.0,
'num_learning_epochs': 8,
'num_mini_batches': 32,
'schedule': 'adaptive',
'use_clipped_value_loss': True,
'value_loss_coef': 1.0,
},
'init_member_classes': {},
'policy': {
'activation': 'relu',
'actor_hidden_dims': [512, 256, 128],
'critic_hidden_dims': [512, 256, 128],
'init_noise_std': 1.0,
},
'runner': {
'algorithm_class_name': 'PPO',
'checkpoint': -1,
'experiment_name': 'humanoid',
'load_run': -1,
'log_interval': 1,
'max_iterations': 2000,
'num_steps_per_env': 10,
'policy_class_name': 'ActorCritic',
'record_interval': 50,
'resume': False,
'resume_path': None,
'run_name': '',
'runner_class_name': 'runner_class_name',
'save_interval': 100,
},
'runner_class_name': 'OnPolicyRunner',
'seed': 1,
}
runner = OnPolicyRunner(env, train_cfg, log_dir = log_dir, device = 'cuda:0')
wandb.init(project = 'genesis', name = 'humanoid', dir = log_dir)
runner.learn(num_learning_iterations = 2000)
humanoid_genesis.xml
<mujoco model="humanoid">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="3" contype="1" material="geom" rgba="0.4 0.33 0.26 1.0"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<!-- Removed RK4 integrator for brax. -->
<option iterations="8" timestep="0.003"/>
<custom>
<!-- brax custom params -->
<numeric data="2500" name="constraint_limit_stiffness"/>
<numeric data="27000" name="constraint_stiffness"/>
<numeric data="30" name="constraint_ang_damping"/>
<numeric data="80" name="constraint_vel_damping"/>
<numeric data="-0.05" name="ang_damping"/>
<numeric data="0.5" name="joint_scale_pos"/>
<numeric data="0.1" name="joint_scale_ang"/>
<numeric data="0" name="spring_mass_scale"/>
<numeric data="1" name="spring_inertia_scale"/>
<numeric data="20" name="matrix_inv_iterations"/>
<numeric data="15" name="solver_maxls"/>
</custom>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<!-- <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/> -->
<!-- <geom conaffinity="1" condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" size="20 20 0.125" type="plane" rgba="0.5 0.5 0.5 1.0"/> -->
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 1.4">
<camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
<geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 -0.04">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0 0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0 0 -0.45">
<geom contype="1" name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 -0.04">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0 -0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0 0 -0.45">
<geom contype="1" name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>
Expected Behavior
- Training should proceed without generating NaN values
- Value loss should remain stable or decrease over time
- Training should complete without interruption
- Training should work properly as it does in other physics simulators
- The same parameters used in Brax/MJX resulted in successful training, but fail in Genesis
Screenshots/Videos
Relevant log output
Environment
- OS: [e.g. Ubuntu 24.04, Windows 11 24H2] Ubuntu 20.04
- GPU/CPU [e.g. A100, RTX 4090, M3pr, Intel I9-9900k, Ryzen 5900x] (N/A if no GPU/CPU) RTX 2070 Super
- GPU-driver version (N/A if no GPU) : 535
- CUDA / CUDA-toolkit version (N/A if non-Nvidia) : 12.2
Release version or Commit ID
0.2.0
Additional Context
No response