stable-baselines3-contrib icon indicating copy to clipboard operation
stable-baselines3-contrib copied to clipboard

TQC: ep_len_mean and ep_rew_mean does not match real values

Open btabia opened this issue 1 month ago • 0 comments

🐛 Bug

Hello,

I am currently using TQC (sb3 contrib version 2.3.0/ sb3 version: 2.3.2) with a custom environment on gymnasium (version 0.28.1) and Isaac Sim as a simulator. I have figured out that the data that sb3 tqc displays (ep_len_mean and ep_rew_mean) does not match what my environment calculates. I have checked my environment several times,s and I can't find where the issue might come from.

You can find attached a screenshot showing the discrepancy. Screenshot from 2024-05-07 10-51-16 Screenshot from 2024-05-07 10-51-25

To Reproduce

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import os
from tasks.basetask import Basetask
from tasks.single_debris_rigid import SingleDebrisRigid
from tasks.single_debris_rigid_admitance import SingleDebrisRigidAdmitance

def task_selector(config):
    if config["task"]["name"] == "single_debris_rigid":
        return SingleDebrisRigid(config)
    elif config["task"]["name"] == "single_debris_rigid_admitance":
        return SingleDebrisRigidAdmitance(config)
    else: 
        return SingleDebrisRigid(config)


class Glovebox(gym.Env):
    metadata = {"render_modes": "human"}

    def __init__(self, render_mode=None, cfg_env=None, cfg_task=None,  world=None) -> None:
        self.render_mode = render_mode
        self.step_after_reset = int(1)
        self.current_step = 0
        self.episode_reward = 0
        self.world = world
        self.cfg = cfg_env
        self.prev_time = 0
        self.task = task_selector(cfg_task)
        #
        self._num_actions = self.cfg["env"]["num_actions"]
        # number of observations depends on the number of debris on this version
        #self.seed(seed)
        self.reward_range = (-float("inf"), float("inf"))

        self.action_space = spaces.Box(
                                    low = -1.0,
                                    high = 1.0,
                                    shape = (self._num_actions,),
                                    dtype= np.float32,
                            )
        
        observation_map = {
            "ik_control" : 15,
            "Small" : 19,
            "Medium" : 27,
            "Large": 44,
            "Xlarge": 41,
        }
        self.task.num_observations = self.cfg["env"]["num_observations"]
        self.observation_space = spaces.Box(
                                        low = -float("inf"),
                                        high = float("inf"),
                                        shape = (int(observation_map[self.cfg["env"]["num_observations"]]),),
                                        dtype= np.float32,
                                        )  
        
        self._max_episode_length = self.cfg["env"]["max_episode_length"]
        gym.Env.__init__(self)
        self.set_up_env()
        self.task.set_up_task(self.world)



    # set up the main component of the environment such as the glovebox and the robot
    def set_up_env(self):
        self.include_glovebox(self.world)
        print("--- Glovebox Environment added --")
        return

    def include_glovebox(self, world):
        # include all the glovebox environment with the robot included
        # Only one robot is managed
        import omni.isaac.core.utils.stage as stage_utils
        glovebox_usd_path = self.cfg["env"]["glovebox_usd_path"]
        stage_utils.add_reference_to_stage(usd_path=glovebox_usd_path, prim_path = "/World")

    def get_dt(self):
        return self._dt

    def get_observations(self):
        self.world.render()
        obs =  self.task.get_observations()
        return obs
    
    def get_info(self):
        return 0
    
    def seed(self):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        np.random.seed(seed)
        return [seed]
    
    def close(self, seed=None, options=None):
        super().close()
        return
    
    def compute_reward(self):
        rewards = self.task.calculate_metrics()
        return rewards
    
    def reset(self, seed=None, options=None):
        self.current_step = 0   
        self.episode_reward = 0
        self.world.reset()
        super().reset(seed=seed)
        self.task.reset()
        obs = self.get_observations()
        info = {}
        return obs, info
    
    def step(self, action):
        self.current_step = self.current_step + 1
        self.task.perform_action(action)
        for i in range(self.cfg["env"]["skipframe"]):
            self.world.step(render=False)
        
        observations = self.get_observations()
        
        info = {}
        terminated = False
        truncated = False

        if self.current_step >= self._max_episode_length:
            terminated = True
            truncated = True
            print("Truncation")
            print("Number of Timestep : " + str(self.current_step))
            print("max episode length: " + str(self._max_episode_length))
            

        terminated = terminated or self.task.is_done(terminated)
        rewards = self.compute_reward()
        self.episode_reward = self.episode_reward + rewards
        if terminated == True: 
            print("Total episode reward: " + str(self.episode_reward))
            self.episode_reward = 0
        return observations, rewards, terminated, truncated, info

Relevant log output / Error message

No response

System Info

GPU: RTX 4090 Cuda 12.2

Package Version Location


absl-py 2.1.0 aiobotocore 1.2.0 aiodns 2.0.0 aiofiles 0.4.0 aiohttp 3.8.3 aioitertools 0.7.1 aiosignal 1.3.1 antlr4-python3-runtime 4.9.3 anyio 3.7.1 appdirs 1.4.4 asteval 0.9.21 astunparse 1.6.3 async-timeout 4.0.2 attrs 20.1.0 azure-core 1.28.0 azure-identity 1.13.0 azure-storage-blob 12.17.0 boto3 1.26.63 botocore 1.20.49 cchardet 2.1.6 certifi 2023.5.7 cffi 1.15.1 charset-normalizer 2.1.1 click 8.1.3 cloudpickle 3.0.0 cmake 3.29.0.1 construct 2.10.68 contourpy 1.2.1 coverage 6.1.2 cryptography 41.0.6 cycler 0.11.0 docker-pycreds 0.4.0 exceptiongroup 1.1.2 Farama-Notifications 0.0.4 fastapi 0.92.0 filelock 3.12.2 flatbuffers 24.3.25 fonttools 4.51.0 frozenlist 1.3.3 fsspec 2024.3.1 gast 0.5.4 gitdb 4.0.11 GitPython 3.1.43 Glovebox 0.0.1 /home/btabia/git/residual_soft_push/envs google-pasta 0.2.0 grpcio 1.62.1 gunicorn 20.1.0 gymnasium 0.28.1 h11 0.14.0 h5py 3.10.0 httptools 0.4.0 hydra-core 1.3.2 idna 3.4 idna-ssl 1.1.0 imageio 2.22.2 isodate 0.6.1 jax-jumpy 1.0.0 Jinja2 3.1.2 jmespath 0.10.0 jsonschema 3.2.0 keras 3.2.0 kiwisolver 1.4.4 libclang 18.1.1 lit 18.1.2 llvmlite 0.40.0 lxml 4.9.3 Markdown 3.6 markdown-it-py 3.0.0 MarkupSafe 2.1.3 matplotlib 3.7.1 mdurl 0.1.2 ml-dtypes 0.3.2 mpmath 1.3.0 msal 1.23.0 msal-extensions 1.0.0 multidict 6.0.4 namex 0.0.7 nest-asyncio 1.5.6 networkx 3.1 numba 0.57.0 numpy 1.23.5 numpy-quaternion 2022.4.3 nvidia-cublas-cu11 11.10.3.66 nvidia-cublas-cu12 12.1.3.1 nvidia-cuda-cupti-cu11 11.7.101 nvidia-cuda-cupti-cu12 12.1.105 nvidia-cuda-nvrtc-cu11 11.7.99 nvidia-cuda-nvrtc-cu12 12.1.105 nvidia-cuda-runtime-cu11 11.7.99 nvidia-cuda-runtime-cu12 12.1.105 nvidia-cudnn-cu11 8.5.0.96 nvidia-cudnn-cu12 8.9.2.26 nvidia-cufft-cu11 10.9.0.58 nvidia-cufft-cu12 11.0.2.54 nvidia-curand-cu11 10.2.10.91 nvidia-curand-cu12 10.3.2.106 nvidia-cusolver-cu11 11.4.0.1 nvidia-cusolver-cu12 11.4.5.107 nvidia-cusparse-cu11 11.7.4.91 nvidia-cusparse-cu12 12.1.0.106 nvidia-lula-no-cuda 0.9.1 nvidia-nccl-cu11 2.14.3 nvidia-nccl-cu12 2.20.5 nvidia-nvjitlink-cu12 12.4.127 nvidia-nvtx-cu11 11.7.91 nvidia-nvtx-cu12 12.1.105 nvidia-srl-base 0.9.0 nvidia-srl-math 0.8.0 nvidia-srl-usd 0.13.0 nvidia-srl-usd-to-urdf 0.5.0 nvsmi 0.4.2 oauthlib 3.2.2 omegaconf 2.3.0 opt-einsum 3.3.0 optree 0.11.0 osqp 0.6.2.post8 packaging 23.0 pandas 2.2.1 pathtools 0.1.2 Pillow 9.2.0 Pint 0.20.1 pip 21.2.1+nv1 plotly 5.3.1 portalocker 2.7.0 protobuf 4.25.3 psutil 5.7.2 pycares 3.1.1 pycparser 2.21 pydantic 1.9.2 Pygments 2.17.2 pyparsing 3.0.9 pyperclip 1.8.0 pypng 0.20220715.0 pyrsistent 0.19.3 python-dateutil 2.8.2 python-multipart 0.0.6 pytz 2022.7.1 PyYAML 6.0.1 qdldl 0.1.5.post3 qrcode 7.4.2 requests 2.31.0 requests-oauthlib 1.3.1 rich 13.7.1 s3transfer 0.6.1 sb3_contrib 2.3.0 scipy 1.10.1 selenium 4.14.0 sentry-sdk 1.14.0 setproctitle 1.3.3 setuptools 68.0.0 setuptools-scm 8.0.4 six 1.16.0 smmap 5.0.1 sniffio 1.3.0 stable_baselines3 2.3.2 starlette 0.25.0 sympy 1.12 tensorboard 2.16.2 tensorboard-data-server 0.7.2 tensorflow-io-gcs-filesystem 0.36.0 termcolor 2.4.0 toml 0.10.1 tomli 2.0.1 torch 2.0.1 torchvision 0.15.2+cu118 tornado 6.2 triton 2.0.0 typing_extensions 4.11.0 tzdata 2024.1 urllib3 1.26.16 uvicorn 0.21.1 wandb 0.16.6 watchdog 0.10.4 webbot 0.34 websockets 10.3 Werkzeug 3.0.2 wheel 0.43.0 wrapt 1.10.10 yarl 1.8.2

Checklist

btabia avatar May 07 '24 10:05 btabia