Metaworld
Metaworld copied to clipboard
[BUG] ValueError during training procedure
Hi. I've been trying to reproduce the results of meta-world paper with RLLib. (Figure 11) in this paper
When sometimes during the training procedure, the value error appears. The error messages are like this.
ValueError: ('Observation ({} dtype={}) outside given space ({})!', array([ 0.54684764, 0.44018602, 0.5549992 , 0.3511533 , -0.0165956 ,
0.57203245, 0.01993605, 0. , 0. , 0. ,
1. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0.5476604 , 0.44060594,
0.55454004, 0.34979662, -0.0165956 , 0.57203245, 0.01993605,
0. , 0. , 0. , 1. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. ], dtype=float32), dtype('float32'), Box([-0.525 0.348 -0.0525 -1. -inf -inf -inf -inf -inf
-inf -inf -inf -inf -inf -inf -inf -inf -inf
-0.525 0.348 -0.0525 -1. -inf -inf -inf -inf -inf
-inf -inf -inf -inf -inf -inf -inf -inf -inf
0. 0. 0. ], [0.525 1.025 0.7 1. inf inf inf inf inf inf inf inf
inf inf inf inf inf inf 0.525 1.025 0.7 1. inf inf
inf inf inf inf inf inf inf inf inf inf inf inf
0. 0. 0. ], (39,), float32))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/rllib/evaluation/worker_set.py", line 725, in _worker_health_check
ray.get(obj_ref)
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/_private/worker.py", line 2275, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(StopIteration): ray::RolloutWorker.sample_with_count() (pid=64294, ip=163.152.162.213, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f3032a94b50>)
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 866, in sample_with_count
batch = self.sample()
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 806, in sample
batches = [self.input_reader.next()]
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/rllib/evaluation/sampler.py", line 92, in next
batches = [self.get_data()]
File "/opt/anaconda3/envs/metarl2/lib/python3.9/site-packages/ray/rllib/evaluation/sampler.py", line 282, in get_data
item = next(self._env_runner)
StopIteration
How can I fix this error? I think this error is related to this issue
[Dependencies]
python==3.9.7 torch==1.11.0+cu113 mujoco-py==2.1.2.14 mujoco == 2.1.0 ray==2.0.0
[Codes]
import metaworld
import os
import random
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import ray
from ray.tune.registry import register_env
from ray.rllib.agents.ppo import PPOTrainer, PPOConfig
from ray.tune.logger import pretty_print
from custom_metric_callback import MyCallbacks
import metaworld
from metaworld.envs import (ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE,
ALL_V2_ENVIRONMENTS_GOAL_HIDDEN)
hidden_env_names = ['assembly-v2-goal-hidden', 'basketball-v2-goal-hidden', 'bin-picking-v2-goal-hidden', 'box-close-v2-goal-hidden',
'button-press-topdown-v2-goal-hidden', 'button-press-topdown-wall-v2-goal-hidden',
'button-press-v2-goal-hidden', 'button-press-wall-v2-goal-hidden', 'coffee-button-v2-goal-hidden',
'coffee-pull-v2-goal-hidden', 'coffee-push-v2-goal-hidden', 'dial-turn-v2-goal-hidden',
'disassemble-v2-goal-hidden', 'door-close-v2-goal-hidden', 'door-lock-v2-goal-hidden', 'door-open-v2-goal-hidden',
'door-unlock-v2-goal-hidden', 'hand-insert-v2-goal-hidden', 'drawer-close-v2-goal-hidden',
'drawer-open-v2-goal-hidden', 'faucet-open-v2-goal-hidden', 'faucet-close-v2-goal-hidden', 'hammer-v2-goal-hidden',
'handle-press-side-v2-goal-hidden', 'handle-press-v2-goal-hidden', 'handle-pull-side-v2-goal-hidden',
'handle-pull-v2-goal-hidden', 'lever-pull-v2-goal-hidden', 'peg-insert-side-v2-goal-hidden',
'pick-place-wall-v2-goal-hidden', 'pick-out-of-hole-v2-goal-hidden', 'reach-v2-goal-hidden',
'push-back-v2-goal-hidden', 'push-v2-goal-hidden', 'pick-place-v2-goal-hidden', 'plate-slide-v2-goal-hidden',
'plate-slide-side-v2-goal-hidden', 'plate-slide-back-v2-goal-hidden',
'plate-slide-back-side-v2-goal-hidden', 'peg-unplug-side-v2-goal-hidden', 'soccer-v2-goal-hidden',
'stick-push-v2-goal-hidden', 'stick-pull-v2-goal-hidden', 'push-wall-v2-goal-hidden', 'reach-wall-v2-goal-hidden',
'shelf-place-v2-goal-hidden', 'sweep-into-v2-goal-hidden', 'sweep-v2-goal-hidden', 'window-open-v2-goal-hidden',
'window-close-v2-goal-hidden']
observable_env_names = ['assembly-v2-goal-observable', 'basketball-v2-goal-observable', 'bin-picking-v2-goal-observable', 'box-close-v2-goal-observable',
'button-press-topdown-v2-goal-observable', 'button-press-topdown-wall-v2-goal-observable',
'button-press-v2-goal-observable', 'button-press-wall-v2-goal-observable', 'coffee-button-v2-goal-observable',
'coffee-pull-v2-goal-observable', 'coffee-push-v2-goal-observable', 'dial-turn-v2-goal-observable',
'disassemble-v2-goal-observable', 'door-close-v2-goal-observable', 'door-lock-v2-goal-observable', 'door-open-v2-goal-observable',
'door-unlock-v2-goal-observable', 'hand-insert-v2-goal-observable', 'drawer-close-v2-goal-observable',
'drawer-open-v2-goal-observable', 'faucet-open-v2-goal-observable', 'faucet-close-v2-goal-observable', 'hammer-v2-goal-observable',
'handle-press-side-v2-goal-observable', 'handle-press-v2-goal-observable', 'handle-pull-side-v2-goal-observable',
'handle-pull-v2-goal-observable', 'lever-pull-v2-goal-observable', 'peg-insert-side-v2-goal-observable',
'pick-place-wall-v2-goal-observable', 'pick-out-of-hole-v2-goal-observable', 'reach-v2-goal-observable',
'push-back-v2-goal-observable', 'push-v2-goal-observable', 'pick-place-v2-goal-observable', 'plate-slide-v2-goal-observable',
'plate-slide-side-v2-goal-observable', 'plate-slide-back-v2-goal-observable',
'plate-slide-back-side-v2-goal-observable', 'peg-unplug-side-v2-goal-observable', 'soccer-v2-goal-observable',
'stick-push-v2-goal-observable', 'stick-pull-v2-goal-observable', 'push-wall-v2-goal-observable', 'reach-wall-v2-goal-observable',
'shelf-place-v2-goal-observable', 'sweep-into-v2-goal-observable', 'sweep-v2-goal-observable', 'window-open-v2-goal-observable',
'window-close-v2-goal-observable']
def env_creator_hidden(env_config):
env_name = env_config["env"]
SEED = env_config["seed"]
env_cls = ALL_V2_ENVIRONMENTS_GOAL_HIDDEN[env_name]
env = env_cls(seed=SEED)
env.seed(SEED)
random.seed(SEED)
return env
def env_creator_observable(env_config):
env_name = env_config["env"]
SEED = env_config["seed"]
env_cls = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_name]
env = env_cls(seed=SEED)
env.seed(SEED)
random.seed(SEED)
return env
for env_name in hidden_env_names:
register_env(env_name, env_creator_hidden)
for env_name in observable_env_names:
register_env(env_name, env_creator_observable)
num_gpus = 8
num_envs = len(observable_env_names)
gpu_fractions = num_gpus / num_envs
@ray.remote(num_cpus=2, num_gpus=gpu_fractions)
def distributed_trainer(env_name):
config = PPOConfig()
config.training(
gamma=0.99,
lr=0.0005,
train_batch_size=2000,
model={
"fcnet_hiddens": [128, 128],
"fcnet_activation": "tanh",
},
use_gae=True,
lambda_=0.95,
vf_loss_coeff=0.2,
entropy_coeff=0.001,
num_sgd_iter=5,
sgd_minibatch_size=64,
shuffle_sequences=True,
)\
.resources(
num_gpus=1,
num_cpus_per_worker=1,
)\
.framework(
framework='torch'
)\
.environment(
env=env_name,
env_config={"env": env_name, "seed": 1}
)\
.rollouts(
num_rollout_workers=2,
num_envs_per_worker=1,
create_env_on_local_worker=False,
rollout_fragment_length=250,
horizon=500,
soft_horizon=False,
no_done_at_end=False,
ignore_worker_failures=True,
recreate_failed_workers=True,
restart_failed_sub_environments=True,
)\
#.callbacks(MyCallbacks)
trainer = PPOTrainer(env=env_name, config=config)
print(f"env_name: {env_name}")
print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()))
print("CUDA_VISIBLE_DEVICES: {}".format(
os.environ["CUDA_VISIBLE_DEVICES"]))
for epoch in range(10000):
result = trainer.train()
result.pop('info')
result.pop('sampler_results')
if epoch % 200 == 0:
custom_metrics = result["custom_metrics"]
print(
f"env_name: {env_name}, epoch: {epoch}, \n custom_metrics: {custom_metrics}")
print(pretty_print(result))
checkpoint = trainer.save()
print("checkpoint saved at", checkpoint)
return 0
distributed_trainier_refs = [distributed_trainer.remote(env_name) for env_name in hidden_env_names]
results = ray.get(distributed_trainier_refs)
distributed_trainier_refs = [distributed_trainer.remote(env_name) for env_name in observable_env_names]
results = ray.get(distributed_trainier_refs)