tensorforce icon indicating copy to clipboard operation
tensorforce copied to clipboard

parallel processing fails when preprocessing is Sequence or Deltafier and batch_agent_calls=True

Open wernerolaf opened this issue 3 years ago • 1 comments

When I have a code like this:

class PongRamEnvironment(Environment):

def __init__(self):
    self.base_env=Environment.create(
   environment='gym', level='Pong-ram-v4',max_episode_timesteps=10000)
    super().__init__()

def states(self):
    return {'type': 'float', 'shape': (4,), 'min_value': 0.0, 'max_value': 255.0}

def actions(self):
    return {'type': 'int', 'shape': (), 'num_values': 2}

def preprocess_state(self, state):
    #cpu_score = ram[13]  # computer/ai opponent score 
    #player_score = ram[14]  # your score
    cpu_paddle_y = state[21]  # Y coordinate of computer paddle
    player_paddle_y = state[51]  # Y coordinate of your paddle
    ball_x = state[49]  # X coordinate of ball
    ball_y = state[54]  # Y coordinate of ball
    obs = np.array([cpu_paddle_y, player_paddle_y, ball_x, ball_y], dtype=np.float32)
    return obs
    
# Optional: should only be defined if environment has a natural fixed
# maximum episode length; restrict training timesteps via
#     Environment.create(..., max_episode_timesteps=???)
def max_episode_timesteps(self):
    return super().max_episode_timesteps()

# Optional additional steps to close environment
def close(self):
    super().close()

def reset(self):
    state = self.base_env.reset()
    state = self.preprocess_state(state)
    return state

def execute(self, actions):
    
    #actions mapped
    actions={0:2,1:3}[actions]
    
    next_state, terminal, reward = self.base_env.execute(actions)
    next_state = self.preprocess_state(next_state)
   
    return next_state, terminal, reward

DQN agent specification

agent = dict( agent='dqn', # Automatically configured network network=dict(type='auto', size=64, depth=1), # Parameters memory= 20000, batch_size = 32, # Reward estimation discount=0.99, predict_terminal_values=False,

state_preprocessing=[
    dict(type='deltafier',concatenate=0),
    dict(type='linear_normalization')
],
reward_processing=None,
# Regularization
l2_regularization=0.0, 
entropy_regularization=0.0,
# Preprocessing
exploration=0.1,
variable_noise=0.0,
# Default additional config values
config=None,
# Save agent every 10 updates and keep the 5 most recent checkpoints
saver=dict(directory='model_ram', frequency=20, max_checkpoints=5),
# Log all available Tensorboard summaries
summarizer=dict(directory='summaries_ram', summaries='all'),
# Do record agent-environment interaction trace
#recorder=dict(directory='record')
max_episode_timesteps=10000

)

runner = Runner(agent=agent, environment=dict(environment=PongRamEnvironment), num_parallel=4, max_episode_timesteps=10000)

runner.run(num_episodes=10,batch_agent_calls=True) runner.close()

I get following error:


InvalidArgumentError Traceback (most recent call last) Input In [139], in <cell line: 4>() 1 runner = Runner(agent=agent, environment=dict(environment=PongRamEnvironment), num_parallel=4, max_episode_timesteps=10000) 3 # Train for 200 episodes ----> 4 runner.run(num_episodes=10,batch_agent_calls=True) 5 runner.close()

File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/execution/runner.py:604, in Runner.run(self, num_episodes, num_timesteps, num_updates, batch_agent_calls, sync_timesteps, sync_episodes, num_sleep_secs, callback, callback_episode_frequency, callback_timestep_frequency, use_tqdm, mean_horizon, evaluation, save_best_agent, evaluation_callback) 601 self.terminals[n] = self.prev_terminals[n] 603 self.handle_observe_joint() --> 604 self.handle_act_joint() 606 # Parallel environments loop 607 no_environment_ready = True

File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/execution/runner.py:726, in Runner.handle_act_joint(self) 724 if len(parallel) > 0: 725 agent_start = time.time() --> 726 self.actions = self.agent.act( 727 states=[self.states[p] for p in parallel], parallel=parallel 728 ) 729 agent_second = (time.time() - agent_start) / len(parallel) 730 for p in parallel:

File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/agents/agent.py:415, in Agent.act(self, states, internals, parallel, independent, deterministic, evaluation) 410 if evaluation is not None: 411 raise TensorforceError.deprecated( 412 name='Agent.act', argument='evaluation', replacement='independent' 413 ) --> 415 return super().act( 416 states=states, internals=internals, parallel=parallel, independent=independent, 417 deterministic=deterministic 418 )

File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/agents/recorder.py:262, in Recorder.act(self, states, internals, parallel, independent, deterministic, **kwargs) 260 # fn_act() 261 if self._is_agent: --> 262 actions, internals = self.fn_act( 263 states=states, internals=internals, parallel=parallel, independent=independent, 264 deterministic=deterministic, is_internals_none=is_internals_none, 265 num_parallel=num_parallel 266 ) 267 else: 268 if batched:

File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/agents/agent.py:462, in Agent.fn_act(self, states, internals, parallel, independent, deterministic, is_internals_none, num_parallel) 460 # Model.act() 461 if not independent: --> 462 actions, timesteps = self.model.act( 463 states=states, auxiliaries=auxiliaries, parallel=parallel 464 ) 465 self.timesteps = timesteps.numpy().item() 467 elif len(self.internals_spec) > 0:

File ~/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/core/module.py:136, in tf_function..decorator..decorated(self, _initialize, *args, **kwargs) 134 # Apply function graph 135 with self: --> 136 output_args = function_graphsstr(graph_params) 137 if not is_loop_body: 138 return output_signature.args_to_kwargs( 139 args=output_args, outer_tuple=True, from_dict=dict_interface 140 )

File ~/anaconda3/envs/master/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback..error_handler(*args, **kwargs) 151 except Exception as e: 152 filtered_tb = _process_traceback_frames(e.traceback) --> 153 raise e.with_traceback(filtered_tb) from None 154 finally: 155 del filtered_tb

File ~/anaconda3/envs/master/lib/python3.9/site-packages/tensorflow/python/eager/execute.py:54, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 52 try: 53 ctx.ensure_initialized() ---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name, 55 inputs, attrs, num_outputs) 56 except core._NotOkStatusException as e: 57 if name is not None:

InvalidArgumentError: Graph execution error:

wernerolaf avatar Jul 19 '22 12:07 wernerolaf

ok it seems it is not supported

(0) INVALID_ARGUMENT: assertion failed: [Deltafier preprocessor currently not compatible with batched Agent.act.] [Condition x <= y did not hold element-wise:] [x (deltafier0/strided_slice:0) = ] [4] [y (deltafier0/assert_less_equal/y:0) = ] [1]

wernerolaf avatar Jul 19 '22 12:07 wernerolaf