Hi, I developed a environment with action_spec as :
BoundedTensorSpec(shape=(2,), dtype=tf.int32, name='action', minimum=array(0, dtype=int32), maximum=array(65535, dtype=int32))
Since the two actions are independent, to obtain the action, I use tfp.Independent to generate the action distribution:
self._output_dist_spec = distribution_spec.DistributionSpec(
tfp.distributions.Independent,
input_param_spec,
sample_spec=output_tensors_spec,
reinterpreted_batch_ndims=1
)
output_dist = self._output_dist_spec.build_distribution(
distribution=tfp.distributions.Categorical(logits=tf.concat([masked_logits, masked_logits], axis=-2)))
where masked_logits is of shape (1, 65536)
However, I met the following errors:
Traceback (most recent call last):
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/mas/zhengyu/workspace/urban_training/urban_training/learning/train_ppo.py", line 126, in
multiprocessing.handle_main(functools.partial(app.run, main))
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/system/default/multiprocessing_core.py", line 77, in handle_main
return app.run(parent_main_fn, *args, **kwargs)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/absl/app.py", line 312, in run
_run_main(main, args)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/absl/app.py", line 258, in _run_main
sys.exit(main(argv))
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/absl/app.py", line 312, in run
_run_main(main, args)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/absl/app.py", line 258, in _run_main
sys.exit(main(argv))
File "/home/mas/zhengyu/workspace/urban_training/urban_training/learning/train_ppo.py", line 117, in main
allow_variable_length_episodes=_ALLOW_VARIABLE_LENGTH_EPISODES.value)
File "/home/mas/zhengyu/workspace/urban_training/urban_training/learning/train_ppo_lib.py", line 107, in train
interval=num_episodes_per_iteration)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/train/triggers.py", line 133, in init
use_nest_path_signatures)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/train/triggers.py", line 174, in _build_saver
input_fn_and_spec=self._input_fn_and_spec,
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/policies/policy_saver.py", line 385, in init
policy_state=batched_policy_state_spec)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 1239, in get_concrete_function
concrete = self._get_concrete_function_garbage_collected(*args, **kwargs)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 1219, in _get_concrete_function_garbage_collected
self._initialize(args, kwargs, add_initializers_to=initializers)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 786, in _initialize
*args, **kwds))
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2520, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2751, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2676, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py", line 1141, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 677, in wrapped_fn
out = weak_wrapped_fn().wrapped(*args, **kwds)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/policies/policy_saver.py", line 381, in polymorphic_action_fn
return action_fn(time_step, policy_state)
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/policies/tf_policy.py", line 346, in action
message='action output and policy_step_spec structures do not match')
File "/usr/local/anaconda/envs/tf-agents/lib/python3.7/site-packages/tf_agents/utils/nest_utils.py", line 125, in assert_same_structure
.format(message, str1, str2, nest1, nest2))
ValueError: action output and policy_step_spec structures do not match:
PolicyStep(action=., state=(), info={'dist_params': {}, 'value_prediction': .})
vs.
PolicyStep(action=., state=(), info=DictWrapper({'dist_params': DictWrapper({'logits': .}), 'value_prediction': .}))
Values:
PolicyStep(action=<tf.Tensor 'IndependentCategorical_CONSTRUCTED_AT_RLPolicyModel/sample/Reshape:0' shape=(None,) dtype=int32>, state=(), info={'dist_params': {}, 'value_prediction': <tf.Tensor 'rl_value_model/Squeeze:0' shape=(None,) dtype=float32>})
vs.
PolicyStep(action=BoundedTensorSpec(shape=(2,), dtype=tf.int32, name='action', minimum=array(0, dtype=int32), maximum=array(65535, dtype=int32)), state=(), info=DictWrapper({'dist_params': DictWrapper({'logits': TensorSpec(shape=(65536,), dtype=tf.float32, name='RLPolicyModel_logits')}), 'value_prediction': TensorSpec(shape=(), dtype=tf.float32, name=None)}))
It seems ther error comes from the following lines in tf_policy.py
if self._validate_args:
nest_utils.assert_same_structure(
step,
self._policy_step_spec,
message='action output and policy_step_spec structures do not match')
which is invoked by the policy_saver in PolicySavedModelTrigger
Is it a bug?
The specs don't match
PolicyStep(action=., state=(), info={'dist_params': {}, 'value_prediction': .})
vs.
PolicyStep(action=., state=(), info=DictWrapper({'dist_params': DictWrapper({'logits': .}), 'value_prediction': .}))
Make sure the policy builds the correct info data.
The specs don't match PolicyStep(action=., state=(), info={'dist_params': {}, 'value_prediction': .}) vs. PolicyStep(action=., state=(), info=DictWrapper({'dist_params': DictWrapper({'logits': .}), 'value_prediction': .}))
Make sure the policy builds the correct info data.
@sguada Does the PPOAgent in tf-agents only support 1-D action?