ray
ray copied to clipboard
RLlib - Multiagent new api - rllib-multi-agent-env-v0 already in registry
What happened + What you expected to happen
I converted existing code working on 2.7 to 2.20 (new api)
The error: File "/opt/project/trading/training/model/rl/multi_agent/ppo/equity/trainer.py", line 125, in start_training_equity algo: Algorithm = config.build() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm_config.py", line 859, in build return algo_class( File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 554, in init super().init( File "/usr/local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 158, in init self.setup(copy.deepcopy(self.config)) File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 670, in setup self.evaluation_workers: EnvRunnerGroup = EnvRunnerGroup( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 169, in init self._setup( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 260, in _setup self._local_worker = self._make_worker( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 1108, in _make_worker worker = cls( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py", line 73, in init self.make_env() File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py", line 726, in make_env gym.register( File "/usr/local/lib/python3.10/site-packages/gymnasium/envs/registration.py", line 693, in register logger.warn(f"Overriding environment {new_spec.id} already in registry.") File "/usr/local/lib/python3.10/site-packages/gymnasium/logger.py", line 55, in warn warnings.warn( UserWarning: WARN: Overriding environment rllib-multi-agent-env-v0 already in registry.
Versions / Dependencies
Ray 2.20
Reproduction script
def env_creator(cfg):
return FinancialPortfolioEnv(env_config=EnvContext(env_config=cfg, worker_index=0))
register_env('FinancialPortfolio-Equity-v0', env_creator)
config = (
PPOConfig()
.api_stack(
enable_rl_module_and_learner=True,
enable_env_runner_and_connector_v2=True,
)
.environment(env="FinancialPortfolio-Equity-v0", env_config=env_cfg)
.training(
train_batch_size=256,
gamma=0.99,
lr=0.001,
lambda_=0.95,
# sgd_minibatch_size=64,
clip_param=0.2,
# vf_clip_param = 10.0,
kl_target=0.005,
kl_coeff=0.5,
entropy_coeff=0.01,
vf_loss_coeff=0.5,
model={
"uses_new_env_runners": True,
"use_lstm": False,
"fcnet_hiddens": [1024, 512, 256, 128, 64, 32],
"fcnet_activation": "relu",
},
)
.framework(framework="torch")
.resources(
num_learner_workers=0, # <- in most cases, set this value to the number of GPUs
num_gpus_per_learner_worker=0, # <- set this to 1, if you have at least 1 GPU
num_cpus_for_local_worker=1,
)
.env_runners(env_runner_cls=MultiAgentEnvRunner, num_env_runners=0, num_envs_per_env_runner=1,
preprocessor_pref=None)
.evaluation(evaluation_interval=1, evaluation_duration=5, evaluation_duration_unit="episodes",
evaluation_num_env_runners=-1)
.debugging(log_level="INFO", logger_config={"type": PrintLogger, "prefix": "trading-equity"})
.multi_agent(
count_steps_by="env_steps",
policies=get_policies_to_be_trained(),
policy_mapping_fn=policy_mapping_fn,
policies_to_train=get_policies_to_be_trained()
).rl_module(
_enable_rl_module_api=True,
model_config_dict={
"fcnet_activation": "relu",
"fcnet_hiddens": [512, 256, 128, 64, 32],
"uses_new_env_runners": True,
},
rl_module_spec=MultiAgentRLModuleSpec(
module_specs={
p: SingleAgentRLModuleSpec(
module_class=PPOTorchRLModule,
action_space=gym.spaces.Discrete(
n=env_cfg['info']['parameters']['environment']['action']['n'],
start=env_cfg['info']['parameters']['environment']['action']['start']),
observation_space=gym.spaces.Box(low=0, high=1, shape=(len(features),)),
model_config_dict={
"fcnet_activation": "relu",
"fcnet_hiddens": [512, 256, 128, 64, 32]},
catalog_class=PPOCatalog,
) for p in get_policies()},
),
)
)
### Issue Severity
High: It blocks me from completing my task.
when evaluation is disabled it proceeds further but new error appears
File "/opt/project/trading/training/model/rl/multi_agent/ppo/equity/trainer.py", line 129, in start_training_equity result = algo.train() File "/usr/local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 331, in train raise skipped from exception_cause(skipped) File "/usr/local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 328, in train result = self.step() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 873, in step train_results, train_iter_ctx = self._run_one_training_iteration() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 3156, in _run_one_training_iteration results = self.training_step() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 424, in training_step return self._training_step_new_api_stack() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 445, in _training_step_new_api_stack episodes, env_runner_results = synchronous_parallel_sample( File "/usr/local/lib/python3.10/site-packages/ray/rllib/execution/rollout_ops.py", line 94, in synchronous_parallel_sample stats_dicts = [worker_set.local_worker().get_metrics()] File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py", line 615, in get_metrics return self.metrics.reduce() File "/usr/local/lib/python3.10/site-packages/ray/rllib/utils/metrics/metrics_logger.py", line 750, in reduce self.stats[sub_key] = stat.reduce() File "/usr/local/lib/python3.10/site-packages/ray/rllib/utils/metrics/stats.py", line 274, in reduce self.values = self._reduced_values()[1] File "/usr/local/lib/python3.10/site-packages/ray/rllib/utils/metrics/stats.py", line 539, in _reduced_values reduced = reduce_meth(values) File "/usr/local/lib/python3.10/site-packages/numpy/lib/nanfunctions.py", line 1052, in nanmean warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
@zoetsekas Thanks for filing the issue. These are both warnings that are due to some default names getting overwritten and initial metrics being NaN. These can be safely ignored.