stable-baselines3-contrib
stable-baselines3-contrib copied to clipboard
TQC: ep_len_mean and ep_rew_mean does not match real values
🐛 Bug
Hello,
I am currently using TQC (sb3 contrib version 2.3.0/ sb3 version: 2.3.2) with a custom environment on gymnasium (version 0.28.1) and Isaac Sim as a simulator. I have figured out that the data that sb3 tqc displays (ep_len_mean and ep_rew_mean) does not match what my environment calculates. I have checked my environment several times,s and I can't find where the issue might come from.
You can find attached a screenshot showing the discrepancy.
To Reproduce
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import os
from tasks.basetask import Basetask
from tasks.single_debris_rigid import SingleDebrisRigid
from tasks.single_debris_rigid_admitance import SingleDebrisRigidAdmitance
def task_selector(config):
if config["task"]["name"] == "single_debris_rigid":
return SingleDebrisRigid(config)
elif config["task"]["name"] == "single_debris_rigid_admitance":
return SingleDebrisRigidAdmitance(config)
else:
return SingleDebrisRigid(config)
class Glovebox(gym.Env):
metadata = {"render_modes": "human"}
def __init__(self, render_mode=None, cfg_env=None, cfg_task=None, world=None) -> None:
self.render_mode = render_mode
self.step_after_reset = int(1)
self.current_step = 0
self.episode_reward = 0
self.world = world
self.cfg = cfg_env
self.prev_time = 0
self.task = task_selector(cfg_task)
#
self._num_actions = self.cfg["env"]["num_actions"]
# number of observations depends on the number of debris on this version
#self.seed(seed)
self.reward_range = (-float("inf"), float("inf"))
self.action_space = spaces.Box(
low = -1.0,
high = 1.0,
shape = (self._num_actions,),
dtype= np.float32,
)
observation_map = {
"ik_control" : 15,
"Small" : 19,
"Medium" : 27,
"Large": 44,
"Xlarge": 41,
}
self.task.num_observations = self.cfg["env"]["num_observations"]
self.observation_space = spaces.Box(
low = -float("inf"),
high = float("inf"),
shape = (int(observation_map[self.cfg["env"]["num_observations"]]),),
dtype= np.float32,
)
self._max_episode_length = self.cfg["env"]["max_episode_length"]
gym.Env.__init__(self)
self.set_up_env()
self.task.set_up_task(self.world)
# set up the main component of the environment such as the glovebox and the robot
def set_up_env(self):
self.include_glovebox(self.world)
print("--- Glovebox Environment added --")
return
def include_glovebox(self, world):
# include all the glovebox environment with the robot included
# Only one robot is managed
import omni.isaac.core.utils.stage as stage_utils
glovebox_usd_path = self.cfg["env"]["glovebox_usd_path"]
stage_utils.add_reference_to_stage(usd_path=glovebox_usd_path, prim_path = "/World")
def get_dt(self):
return self._dt
def get_observations(self):
self.world.render()
obs = self.task.get_observations()
return obs
def get_info(self):
return 0
def seed(self):
self.np_random, seed = gym.utils.seeding.np_random(seed)
np.random.seed(seed)
return [seed]
def close(self, seed=None, options=None):
super().close()
return
def compute_reward(self):
rewards = self.task.calculate_metrics()
return rewards
def reset(self, seed=None, options=None):
self.current_step = 0
self.episode_reward = 0
self.world.reset()
super().reset(seed=seed)
self.task.reset()
obs = self.get_observations()
info = {}
return obs, info
def step(self, action):
self.current_step = self.current_step + 1
self.task.perform_action(action)
for i in range(self.cfg["env"]["skipframe"]):
self.world.step(render=False)
observations = self.get_observations()
info = {}
terminated = False
truncated = False
if self.current_step >= self._max_episode_length:
terminated = True
truncated = True
print("Truncation")
print("Number of Timestep : " + str(self.current_step))
print("max episode length: " + str(self._max_episode_length))
terminated = terminated or self.task.is_done(terminated)
rewards = self.compute_reward()
self.episode_reward = self.episode_reward + rewards
if terminated == True:
print("Total episode reward: " + str(self.episode_reward))
self.episode_reward = 0
return observations, rewards, terminated, truncated, info
Relevant log output / Error message
No response
System Info
GPU: RTX 4090 Cuda 12.2
Package Version Location
absl-py 2.1.0 aiobotocore 1.2.0 aiodns 2.0.0 aiofiles 0.4.0 aiohttp 3.8.3 aioitertools 0.7.1 aiosignal 1.3.1 antlr4-python3-runtime 4.9.3 anyio 3.7.1 appdirs 1.4.4 asteval 0.9.21 astunparse 1.6.3 async-timeout 4.0.2 attrs 20.1.0 azure-core 1.28.0 azure-identity 1.13.0 azure-storage-blob 12.17.0 boto3 1.26.63 botocore 1.20.49 cchardet 2.1.6 certifi 2023.5.7 cffi 1.15.1 charset-normalizer 2.1.1 click 8.1.3 cloudpickle 3.0.0 cmake 3.29.0.1 construct 2.10.68 contourpy 1.2.1 coverage 6.1.2 cryptography 41.0.6 cycler 0.11.0 docker-pycreds 0.4.0 exceptiongroup 1.1.2 Farama-Notifications 0.0.4 fastapi 0.92.0 filelock 3.12.2 flatbuffers 24.3.25 fonttools 4.51.0 frozenlist 1.3.3 fsspec 2024.3.1 gast 0.5.4 gitdb 4.0.11 GitPython 3.1.43 Glovebox 0.0.1 /home/btabia/git/residual_soft_push/envs google-pasta 0.2.0 grpcio 1.62.1 gunicorn 20.1.0 gymnasium 0.28.1 h11 0.14.0 h5py 3.10.0 httptools 0.4.0 hydra-core 1.3.2 idna 3.4 idna-ssl 1.1.0 imageio 2.22.2 isodate 0.6.1 jax-jumpy 1.0.0 Jinja2 3.1.2 jmespath 0.10.0 jsonschema 3.2.0 keras 3.2.0 kiwisolver 1.4.4 libclang 18.1.1 lit 18.1.2 llvmlite 0.40.0 lxml 4.9.3 Markdown 3.6 markdown-it-py 3.0.0 MarkupSafe 2.1.3 matplotlib 3.7.1 mdurl 0.1.2 ml-dtypes 0.3.2 mpmath 1.3.0 msal 1.23.0 msal-extensions 1.0.0 multidict 6.0.4 namex 0.0.7 nest-asyncio 1.5.6 networkx 3.1 numba 0.57.0 numpy 1.23.5 numpy-quaternion 2022.4.3 nvidia-cublas-cu11 11.10.3.66 nvidia-cublas-cu12 12.1.3.1 nvidia-cuda-cupti-cu11 11.7.101 nvidia-cuda-cupti-cu12 12.1.105 nvidia-cuda-nvrtc-cu11 11.7.99 nvidia-cuda-nvrtc-cu12 12.1.105 nvidia-cuda-runtime-cu11 11.7.99 nvidia-cuda-runtime-cu12 12.1.105 nvidia-cudnn-cu11 8.5.0.96 nvidia-cudnn-cu12 8.9.2.26 nvidia-cufft-cu11 10.9.0.58 nvidia-cufft-cu12 11.0.2.54 nvidia-curand-cu11 10.2.10.91 nvidia-curand-cu12 10.3.2.106 nvidia-cusolver-cu11 11.4.0.1 nvidia-cusolver-cu12 11.4.5.107 nvidia-cusparse-cu11 11.7.4.91 nvidia-cusparse-cu12 12.1.0.106 nvidia-lula-no-cuda 0.9.1 nvidia-nccl-cu11 2.14.3 nvidia-nccl-cu12 2.20.5 nvidia-nvjitlink-cu12 12.4.127 nvidia-nvtx-cu11 11.7.91 nvidia-nvtx-cu12 12.1.105 nvidia-srl-base 0.9.0 nvidia-srl-math 0.8.0 nvidia-srl-usd 0.13.0 nvidia-srl-usd-to-urdf 0.5.0 nvsmi 0.4.2 oauthlib 3.2.2 omegaconf 2.3.0 opt-einsum 3.3.0 optree 0.11.0 osqp 0.6.2.post8 packaging 23.0 pandas 2.2.1 pathtools 0.1.2 Pillow 9.2.0 Pint 0.20.1 pip 21.2.1+nv1 plotly 5.3.1 portalocker 2.7.0 protobuf 4.25.3 psutil 5.7.2 pycares 3.1.1 pycparser 2.21 pydantic 1.9.2 Pygments 2.17.2 pyparsing 3.0.9 pyperclip 1.8.0 pypng 0.20220715.0 pyrsistent 0.19.3 python-dateutil 2.8.2 python-multipart 0.0.6 pytz 2022.7.1 PyYAML 6.0.1 qdldl 0.1.5.post3 qrcode 7.4.2 requests 2.31.0 requests-oauthlib 1.3.1 rich 13.7.1 s3transfer 0.6.1 sb3_contrib 2.3.0 scipy 1.10.1 selenium 4.14.0 sentry-sdk 1.14.0 setproctitle 1.3.3 setuptools 68.0.0 setuptools-scm 8.0.4 six 1.16.0 smmap 5.0.1 sniffio 1.3.0 stable_baselines3 2.3.2 starlette 0.25.0 sympy 1.12 tensorboard 2.16.2 tensorboard-data-server 0.7.2 tensorflow-io-gcs-filesystem 0.36.0 termcolor 2.4.0 toml 0.10.1 tomli 2.0.1 torch 2.0.1 torchvision 0.15.2+cu118 tornado 6.2 triton 2.0.0 typing_extensions 4.11.0 tzdata 2024.1 urllib3 1.26.16 uvicorn 0.21.1 wandb 0.16.6 watchdog 0.10.4 webbot 0.34 websockets 10.3 Werkzeug 3.0.2 wheel 0.43.0 wrapt 1.10.10 yarl 1.8.2
Checklist
- [X] I have checked that there is no similar issue in the repo
- [X] I have read the documentation
- [X] I have provided a minimal and working example to reproduce the bug
- [X] I've used the markdown code blocks for both code and stack traces.