tensorforce
tensorforce copied to clipboard
parallel processing fails when preprocessing is Sequence or Deltafier and batch_agent_calls=True
When I have a code like this:
class PongRamEnvironment(Environment):
def __init__(self):
self.base_env=Environment.create(
environment='gym', level='Pong-ram-v4',max_episode_timesteps=10000)
super().__init__()
def states(self):
return {'type': 'float', 'shape': (4,), 'min_value': 0.0, 'max_value': 255.0}
def actions(self):
return {'type': 'int', 'shape': (), 'num_values': 2}
def preprocess_state(self, state):
#cpu_score = ram[13] # computer/ai opponent score
#player_score = ram[14] # your score
cpu_paddle_y = state[21] # Y coordinate of computer paddle
player_paddle_y = state[51] # Y coordinate of your paddle
ball_x = state[49] # X coordinate of ball
ball_y = state[54] # Y coordinate of ball
obs = np.array([cpu_paddle_y, player_paddle_y, ball_x, ball_y], dtype=np.float32)
return obs
# Optional: should only be defined if environment has a natural fixed
# maximum episode length; restrict training timesteps via
# Environment.create(..., max_episode_timesteps=???)
def max_episode_timesteps(self):
return super().max_episode_timesteps()
# Optional additional steps to close environment
def close(self):
super().close()
def reset(self):
state = self.base_env.reset()
state = self.preprocess_state(state)
return state
def execute(self, actions):
#actions mapped
actions={0:2,1:3}[actions]
next_state, terminal, reward = self.base_env.execute(actions)
next_state = self.preprocess_state(next_state)
return next_state, terminal, reward
DQN agent specification
agent = dict( agent='dqn', # Automatically configured network network=dict(type='auto', size=64, depth=1), # Parameters memory= 20000, batch_size = 32, # Reward estimation discount=0.99, predict_terminal_values=False,
state_preprocessing=[
dict(type='deltafier',concatenate=0),
dict(type='linear_normalization')
],
reward_processing=None,
# Regularization
l2_regularization=0.0,
entropy_regularization=0.0,
# Preprocessing
exploration=0.1,
variable_noise=0.0,
# Default additional config values
config=None,
# Save agent every 10 updates and keep the 5 most recent checkpoints
saver=dict(directory='model_ram', frequency=20, max_checkpoints=5),
# Log all available Tensorboard summaries
summarizer=dict(directory='summaries_ram', summaries='all'),
# Do record agent-environment interaction trace
#recorder=dict(directory='record')
max_episode_timesteps=10000
)
runner = Runner(agent=agent, environment=dict(environment=PongRamEnvironment), num_parallel=4, max_episode_timesteps=10000)
runner.run(num_episodes=10,batch_agent_calls=True) runner.close()
I get following error:
InvalidArgumentError Traceback (most recent call last) Input In [139], in <cell line: 4>() 1 runner = Runner(agent=agent, environment=dict(environment=PongRamEnvironment), num_parallel=4, max_episode_timesteps=10000) 3 # Train for 200 episodes ----> 4 runner.run(num_episodes=10,batch_agent_calls=True) 5 runner.close()
File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/execution/runner.py:604, in Runner.run(self, num_episodes, num_timesteps, num_updates, batch_agent_calls, sync_timesteps, sync_episodes, num_sleep_secs, callback, callback_episode_frequency, callback_timestep_frequency, use_tqdm, mean_horizon, evaluation, save_best_agent, evaluation_callback) 601 self.terminals[n] = self.prev_terminals[n] 603 self.handle_observe_joint() --> 604 self.handle_act_joint() 606 # Parallel environments loop 607 no_environment_ready = True
File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/execution/runner.py:726, in Runner.handle_act_joint(self) 724 if len(parallel) > 0: 725 agent_start = time.time() --> 726 self.actions = self.agent.act( 727 states=[self.states[p] for p in parallel], parallel=parallel 728 ) 729 agent_second = (time.time() - agent_start) / len(parallel) 730 for p in parallel:
File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/agents/agent.py:415, in Agent.act(self, states, internals, parallel, independent, deterministic, evaluation) 410 if evaluation is not None: 411 raise TensorforceError.deprecated( 412 name='Agent.act', argument='evaluation', replacement='independent' 413 ) --> 415 return super().act( 416 states=states, internals=internals, parallel=parallel, independent=independent, 417 deterministic=deterministic 418 )
File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/agents/recorder.py:262, in Recorder.act(self, states, internals, parallel, independent, deterministic, **kwargs) 260 # fn_act() 261 if self._is_agent: --> 262 actions, internals = self.fn_act( 263 states=states, internals=internals, parallel=parallel, independent=independent, 264 deterministic=deterministic, is_internals_none=is_internals_none, 265 num_parallel=num_parallel 266 ) 267 else: 268 if batched:
File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/agents/agent.py:462, in Agent.fn_act(self, states, internals, parallel, independent, deterministic, is_internals_none, num_parallel) 460 # Model.act() 461 if not independent: --> 462 actions, timesteps = self.model.act( 463 states=states, auxiliaries=auxiliaries, parallel=parallel 464 ) 465 self.timesteps = timesteps.numpy().item() 467 elif len(self.internals_spec) > 0:
File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/core/module.py:136, in tf_function.
File ~/anaconda3/envs/master/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.
File ~/anaconda3/envs/master/lib/python3.9/site-packages/tensorflow/python/eager/execute.py:54, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 52 try: 53 ctx.ensure_initialized() ---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name, 55 inputs, attrs, num_outputs) 56 except core._NotOkStatusException as e: 57 if name is not None:
InvalidArgumentError: Graph execution error:
ok it seems it is not supported
(0) INVALID_ARGUMENT: assertion failed: [Deltafier preprocessor currently not compatible with batched Agent.act.] [Condition x <= y did not hold element-wise:] [x (deltafier0/strided_slice:0) = ] [4] [y (deltafier0/assert_less_equal/y:0) = ] [1]