agents icon indicating copy to clipboard operation
agents copied to clipboard

Received a mix of batched and unbatched Tensors, or Tensors are not compatible with Specs.

Open TAHTAH98 opened this issue 3 years ago • 8 comments

Hi!

I know the issue have been raised many times but all the solutions proposed didn't work for me, and sometimes the threads didn't get to the end of it. Also often the complete codes weren't proposed so I'd like to ask for help once again.

class CustomEnv(py_environment.PyEnvironment):
    
    def __init__(self):
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=3)
        self._observation_spec = array_spec.BoundedArraySpec(
        shape=(6,), dtype=np.int32, minimum=0, maximum=1)
        self._state = [0,0,0,0,0,0]
        self._counter = 0
        self._episode_ended = False
        self.dictionary = {0: [(0,0), (0,1)], 
                            1: [(0,2)],
                            2: [(1,0), (1,1)],
                            3: [(1,2), (2,0), (2,1), (2,2)]}
    
    def action_spec(self):
        return self._action_spec
    
    def observation_spec(self):
        return self._observation_spec
    
    def _reset(self):
        self._state = [0,0,0,0,0,0]
        self._counter = 0
        self._episode_ended = False
        return ts.restart(np.array([self._state], dtype=np.int32))
    
    def preferences(self):
        return np.random.randint(3, size=2)
    
    def pickedGift(self, yes):
        reward = -1.0
        if yes:
            reward = 0.0
        return reward
    
    def _step(self, action):
        if self._episode_ended:
            self._reset()
        
        if self._counter<250:
            self._counter += 1
            
            color, letter = self.preferences()
            condition = (color, letter) in self.dictionary[int(action)]
            reward = self.pickedGift(condition)
            self._state[color] = 1
            self._state[3+letter] = 1
            
            if self._counter==250:
                self._episode_ended=True
                return ts.termination(np.array([self._state], 
                                               dtype=np.int32),
                                      reward,
                                      1)
            else:
                return ts.transition(np.array([self._state], 
                                              dtype=np.int32), 
                                     reward, 
                                     discount=1.0)

The code that I run that gives me the error:

ValueError: Received a mix of batched and unbatched Tensors, or Tensors are not compatible with Specs.  num_outer_dims: 1.
Saw tensor_shapes:
   TimeStep(
{'discount': TensorShape([1]),
 'observation': TensorShape([1, 1, 6]),
 'reward': TensorShape([1]),
 'step_type': TensorShape([1])})
And spec_shapes:
   TimeStep(
{'discount': TensorShape([]),
 'observation': TensorShape([6]),
 'reward': TensorShape([]),
 'step_type': TensorShape([])})

Is the following:

py_env = ContextualMBA()
tf_env = tf_py_environment.TFPyEnvironment(py_env)
    
fc_layer_params = [16, 16]

q_net = QNetwork(
        tf_env.observation_spec(),
        tf_env.action_spec(),
        fc_layer_params=fc_layer_params)

train_step = tf.Variable(0)
update_period = 1
optimizer = keras.optimizers.RMSprop(lr=2.5e-4,
                                     rho=0.95,
                                     momentum=0.0,
                                     epsilon=0.00001,
                                     centered=True)

epsilon_fn = keras.optimizers.schedules.PolynomialDecay(
        initial_learning_rate=1.0,
        decay_steps=25000 // update_period,
        end_learning_rate=0.01)

agent = DqnAgent(tf_env.time_step_spec(),
                 tf_env.action_spec(),
                 q_network=q_net,
                 optimizer=optimizer,
                 target_update_period=25,
                 td_errors_loss_fn=keras.losses.Huber(reduction="none"),
                 gamma=0.99,
                 train_step_counter=train_step,
                 epsilon_greedy=lambda: epsilon_fn(train_step))

agent.initialize()

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
        data_spec=agent.collect_data_spec,
        batch_size=tf_env.batch_size,
        max_length=25000)
    
replay_buffer_observer = replay_buffer.add_batch
    
train_metrics = [
        tf_metrics.NumberOfEpisodes(),
        tf_metrics.EnvironmentSteps(),
        tf_metrics.AverageReturnMetric(),
        tf_metrics.AverageEpisodeLengthMetric()]
    
    
    
collect_driver = DynamicStepDriver(
        tf_env,
        agent.collect_policy,
        observers=[replay_buffer_observer] + train_metrics,
        num_steps=update_period)
    
initial_collect_policy = RandomTFPolicy(
        tf_env.time_step_spec(),
        tf_env.action_spec())
    
init_driver = DynamicStepDriver(
        tf_env,
        initial_collect_policy,
        observers=[replay_buffer_observer, ShowProgress(250)],
        num_steps=250)
    
final_time_step, final_policy_state = init_driver.run()

dataset = replay_buffer.as_dataset(
        sample_batch_size=16,
        num_steps=2,
        num_parallel_calls=4).prefetch(4)
    
collect_driver.run = function(collect_driver.run)
agent.train = function(agent.train)
    
def train_agent(n_iterations):
    time_step = None
    policy_state = agent.collect_policy.get_initial_state(tf_env.batch_size)
    iterator = iter(dataset)
    for iteration in range(n_iterations):
        time_step, policy_state = collect_driver.run(time_step, policy_state)
        trajectories, buffer_info = next(iterator)
        train_loss = agent.train(trajectories)
        print("\r{} loss:{:.5f}".format(
            iteration, train_loss.loss.numpy()), end="")
        if iteration % 1000 == 0:
            log_metrics(train_metrics)
                
train_agent(25000)

I tried to figure out from where the problem comes from and I noticed this. When I run

tf_env.time_step_spec()

I get the following:

TimeStep(
{'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),
 'observation': BoundedTensorSpec(shape=(6,), dtype=tf.int32, name=None, minimum=array(0), maximum=array(1)),
 'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),
 'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')})

But when I run for example the step method, for instance:

tf_env.step(0)

I get:

TimeStep(
{'discount': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>,
 'observation': <tf.Tensor: shape=(1, 1, 6), dtype=int32, numpy=array([[[0, 1, 0, 0, 0, 1]]])>,
 'reward': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-1.], dtype=float32)>,
 'step_type': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([1])>})

I hope someone could help me, I really tried the solutions provided in the other fixes provided to this same issue but nothing worked. I use Python 3.8.5 and Tensorflow 2.4.1 and TF-Agents 0.8.0 and Numpy 1.20.1.

Thank you in advance.

TAHTAH98 avatar Jun 30 '21 14:06 TAHTAH98

I have a similar error and think I can trace the problem back to the BatchedByEnvironment created when you wrap a PyEnvironment with TFPyEnvrionment.

From my understanding, BatchedPyEnvironment runs multiple environments and concatenates their their results together to form an output. Concatentation adds batch dimension to each array. However, the array spec BatchedPyEnvironment does not reflect this change; it just uses the spec from a single environment, which lacks the batch dimension.

The bad spec leads to policy receiving the wrong spec from the environment, which results in the policy passing the wrong spec to the Reply Buffer and the "mix of batched and unbatched" error we see here.

Does this sound correct? I'll be glad to issue a PR, but want confirmation that BatchedPyEnvironment is not performing correctly first.

WardLT avatar Jul 21 '21 14:07 WardLT

As I dig into the source code more, there are actually test cases enforcing that the BatchedPyEnvironment specs disagree with the actual shape of the output arrays (see here)

The disagreement between spec and shape of the output array seems undesirable, but I'm awfully new to TF-Agents. Could someone clarify?

WardLT avatar Jul 21 '21 15:07 WardLT

I'm getting the same error when I'm trying to replicate compute_avg_return(eval_env, random_policy, num_eval_episodes) line from the official DQN Tutorial for my custom environment.

It looks like the error is caused by env.reset() adding one more dimension to the observation shape. Here are my exact steps:

  1. Define the environments and random policy:
train_py_env = MyEnv()
eval_py_env = MyEnv()
train_env = tf_py_environment.TFPyEnvironment(train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)
random_policy = random_tf_policy.RandomTFPolicy(train_env.time_step_spec(),
                                                train_env.action_spec())

At this point, both environments and the policy all have the same observation shape:

eval_env.time_step_spec().observation.shape, train_env.time_step_spec().observation.shape, random_policy.time_step_spec.observation.shape (TensorShape([5, 5, 3]), TensorShape([5, 5, 3]), TensorShape([5, 5, 3]))

  1. Reset the environment:
time_step = eval_env.reset()

At this point, time_step now holds a different observation shape, with one more dimension added:

time_step.observation.shape TensorShape([1, 3, 5, 5])

  1. Get an action step from the random policy:
action_step = random_policy.action(time_step)

This produces an error:

ValueError: Received a mix of batched and unbatched Tensors, or Tensors are not compatible with Specs. num_outer_dims: 1. Saw tensor_shapes: TimeStep( {'discount': TensorShape([1]), 'observation': TensorShape([1, 3, 5, 2]), 'reward': TensorShape([1]), 'step_type': TensorShape([1])}) And spec_shapes: TimeStep( {'discount': TensorShape([]), 'observation': TensorShape([5, 5, 3]), 'reward': TensorShape([]), 'step_type': TensorShape([])})

Any idea what I might be doing wrong here?

ayarmak avatar Oct 11 '21 22:10 ayarmak

As you can see in the error message the shape of the observation has changed from [5, 5, 3] to [3, 5, 2] so I suppose there is an error in your environment.

'observation': TensorShape([1, **3, 5, 2**]),
vs
'observation': TensorShape([**5, 5, 3**]),

sguada avatar Oct 11 '21 22:10 sguada

@sguada You're exactly right, looks like it was indeed an issue with my environment. First, the observation specs were defined as (5, 5, 3) instead of (3, 5, 5). Second, there was an edge case in the environment logic due to which the last dimension wasn't being fully populated sometimes.

Thanks a lot for helping out!

ayarmak avatar Oct 12 '21 00:10 ayarmak

I seem to have the same error, and I can't see the obvious error in my custom environment.

I have a custom gym environment wrapped in a PyEnvironment, wrapped in a TFPyEnvironment. Upon resetting, the TFPyEnvironment seems to change the shape of the observation.

Here is the observation_space spec of the gym env:

space = {'observation' : gym.spaces.Box(np.float32(0), np.float32(3),
                                                shape=(size*size*3,)),
                                  'legal_moves' : gym.spaces.Discrete(gogame.action_size(self.state_)-1)}

        self.observation_space = gym.spaces.Dict(space)

Here is the return of the reset() method:

observations_and_legal_moves = {'observation' : np.copy(self.state_)[:3].flatten(),
                                        'legal_moves' : 1-self.state_[govars.INVD_CHNL].flatten() }
        return observations_and_legal_moves

And of the step() method:

observations_and_legal_moves = {'observation' : np.copy(self.state_)[:3].flatten(),
                                        'legal_moves' : 1-self.state_[govars.INVD_CHNL].flatten() }

        return observations_and_legal_moves, self.reward(), self.done, self.info()

I wrap the gym in a PyEnvironment : tp_env = suite_gym.load('gym_go:go-v1', gym_kwargs={'size':3,'komi':0}) Then call reset: tp_env.reset() with the output:

TimeStep(
{'discount': array(1., dtype=float32),
 'observation': OrderedDict([('legal_moves',
                              array([1, 1, 1, 1, 1, 1, 1, 1, 1])),
                             ('observation',
                              array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))]),
 'reward': array(0., dtype=float32),
 'step_type': array(0, dtype=int32)})

This is the correct shape : (9,) or () for legal_moves, and (27,) for observation. Then I wrap in TFPyEnvironment: t_env = tf_py_environment.TFPyEnvironment(suite_gym.load('gym_go:go-v1', gym_kwargs={'size':3,'komi':0})), and call reset() : t_env.reset() with the output:

TimeStep(
{'discount': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>,
 'observation': OrderedDict([('legal_moves',
                              <tf.Tensor: shape=(1, 9), dtype=int64, numpy=array([[1, 1, 1, 1, 1, 1, 1, 1, 1]])>),
                             ('observation',
                              <tf.Tensor: shape=(1, 27), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>)]),
 'reward': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>,
 'step_type': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([0], dtype=int32)>})

I've looked through the source code for TFPyEnvironment but can't seem to figure out what it is that is changing the shape. Why is this happening?

I'm not sure but I believe the outer dim being added is the batch dimension? Don't know how to correct my class to comply though.

scrungus avatar Feb 17 '22 18:02 scrungus

I seem to have the same error, and I can't see the obvious error in my custom environment.

I have a custom gym environment wrapped in a PyEnvironment, wrapped in a TFPyEnvironment. Upon resetting, the TFPyEnvironment seems to change the shape of the observation.

Here is the observation_space spec of the gym env:

space = {'observation' : gym.spaces.Box(np.float32(0), np.float32(3),
                                                shape=(size*size*3,)),
                                  'legal_moves' : gym.spaces.Discrete(gogame.action_size(self.state_)-1)}

        self.observation_space = gym.spaces.Dict(space)

Here is the return of the reset() method:

observations_and_legal_moves = {'observation' : np.copy(self.state_)[:3].flatten(),
                                        'legal_moves' : 1-self.state_[govars.INVD_CHNL].flatten() }
        return observations_and_legal_moves

And of the step() method:

observations_and_legal_moves = {'observation' : np.copy(self.state_)[:3].flatten(),
                                        'legal_moves' : 1-self.state_[govars.INVD_CHNL].flatten() }

        return observations_and_legal_moves, self.reward(), self.done, self.info()

I wrap the gym in a PyEnvironment : tp_env = suite_gym.load('gym_go:go-v1', gym_kwargs={'size':3,'komi':0}) Then call reset: tp_env.reset() with the output:

TimeStep(
{'discount': array(1., dtype=float32),
 'observation': OrderedDict([('legal_moves',
                              array([1, 1, 1, 1, 1, 1, 1, 1, 1])),
                             ('observation',
                              array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))]),
 'reward': array(0., dtype=float32),
 'step_type': array(0, dtype=int32)})

This is the correct shape : (9,) or () for legal_moves, and (27,) for observation. Then I wrap in TFPyEnvironment: t_env = tf_py_environment.TFPyEnvironment(suite_gym.load('gym_go:go-v1', gym_kwargs={'size':3,'komi':0})), and call reset() : t_env.reset() with the output:

TimeStep(
{'discount': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>,
 'observation': OrderedDict([('legal_moves',
                              <tf.Tensor: shape=(1, 9), dtype=int64, numpy=array([[1, 1, 1, 1, 1, 1, 1, 1, 1]])>),
                             ('observation',
                              <tf.Tensor: shape=(1, 27), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>)]),
 'reward': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>,
 'step_type': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([0], dtype=int32)>})

I've looked through the source code for TFPyEnvironment but can't seem to figure out what it is that is changing the shape. Why is this happening?

I'm not sure but I believe the outer dim being added is the batch dimension? Don't know how to correct my class to comply though.

This doesn't appear to be a problem, as it happens when the observation_space is just a gym.spaces.Box as opposed to a dict.

When self.observation_space =gym.spaces.Box(np.float32(0), np.float32(3), shape=(size*size*3,)) in the custom gym environment, the TFPyEnvironment reset returns a BoundedTensorSpec of shape (1,27), and it works fine with RandomTFPolicy and the PyDriver. However when it's a dict and a observation splitter is provided :

space = {'observation' : gym.spaces.Box(np.float32(0), np.float32(3),
                                                shape=(size*size*3,)),
                                'legal_moves' : gym.spaces.Discrete(gogame.action_size(self.state_)-1)
                                }

        self.observation_space =  gym.spaces.Dict(space) 

.../
...

def observation_and_action_constraint_splitter_func(obs):
    return obs['observation'],obs['legal_moves']

random_policy = random_tf_policy.RandomTFPolicy(t_env.time_step_spec(),
                                                t_env.action_spec(),
                                                observation_and_action_constraint_splitter=observation_and_action_constraint_splitter_func
                                               )

py_driver.PyDriver(
    env,
    py_tf_eager_policy.PyTFEagerPolicy(
      random_policy, use_tf_function=True),
    [rb_observer],
    max_steps=initial_collect_steps).run(tp_env.reset())

The error :

ValueError: Received a mix of batched and unbatched Tensors, or Tensors are not compatible with Specs.  num_outer_dims: 1.
Saw tensor_shapes:
   TimeStep(
{'discount': TensorShape([1]),
 'observation': {'legal_moves': TensorShape([1, 9]),
                 'observation': TensorShape([1, 27])},
 'reward': TensorShape([1]),
 'step_type': TensorShape([1])})
And spec_shapes:
   TimeStep(
{'discount': TensorShape([]),
 'observation': {'legal_moves': TensorShape([]),
                 'observation': TensorShape([27])},
 'reward': TensorShape([]),
 'step_type': TensorShape([])})

is thrown. Even though it would appear the observation is the exact same shape (1,27). This makes even less sense than before

scrungus avatar Feb 18 '22 17:02 scrungus

@scrunguss The issue seems to lie with the spec of legal_moves: the error message says that it expected a scalar (TensorShape([])) but received a tensor of length 9 (TensorShape([1, 9]); ignoring the 1 batch dimension here).

According to the docs of e.g. DQN Agent, the splitter should return a mask over all actions, indicating which ones are legal; however, gym.spaces.Discrete indicates a scalar instead. I had a similar case and I used gym.spaces.MultiBinary which seemed to work.

In #255 they used tf_agents.specs.ArraySpec directly to define the mask, but that doesn't work with gym.spaces.Dict, so that's why I used gym.spaces.MultiBinary; I'm wondering whether that's the correct/preferred way to define a mask inside gym.

Dominik1123 avatar Feb 22 '22 16:02 Dominik1123