returnn icon indicating copy to clipboard operation
returnn copied to clipboard

TF end layer independent of batch causes error in beam search

Open albertz opened this issue 5 months ago • 0 comments

Jintao (@jiangj-dc) has a config where this is in the rec layer:

"end": {"class": "compare", "from": ":i", "kind": "equal", "value": 3},

That fails with:

EXCEPTION
Traceback (most recent call last):
  File "/nas/models/asr/jiangj/debug/step/returnn/rnn.py", line 11, in <module>
    line: main()
    locals:
      main = <local> <function main at 0x7f57be2945e0>
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/__main__.py", line 737, in main
    line: execute_main_task()
    locals:
      execute_main_task = <global> <function execute_main_task at 0x7f57be2944c0>
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/__main__.py", line 607, in execute_main_task
    line: engine.init_network_from_config(config)
    locals:
      engine = <global> <returnn.tf.engine.Engine object at 0x7f57be2b4c70>
      engine.init_network_from_config = <global> <bound method Engine.init_network_from_config of <returnn.tf.engine.Engine object at 0x7f57be2b4c70>>
      config = <global> <returnn.config.Config object at 0x7f57be39be80>
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/engine.py", line 1243, in Engine.init_network_from_config
    line: self._init_network(net_desc=net_dict, epoch=self.epoch)
    locals:
      self = <local> <returnn.tf.engine.Engine object at 0x7f57be2b4c70>
      self._init_network = <local> <bound method Engine._init_network of <returnn.tf.engine.Engine object at 0x7f57be2b4c70>>
      net_desc = <not found>
      net_dict = <local> {'abs': {'activation': 'abs', 'class': 'activation', 'from': 'stft'}, 'conformer_block_01': {'class': 'copy', 'from': 'conformer_block_01_ln'}, 'conformer_block_01_conv_mod_bn': {'class': 'batch_norm', 'delay_sample_update': True, 'epsilon': 0.001, 'from': 'conformer_block_01_conv_mod_depthwise_c..., len = 444
      epoch = <local> 8
      self.epoch = <local> 8
...
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1728, in WhileContext._BuildLoop
    line: body_result = body(*packed_vars_for_body)
    locals:
      body_result = <not found>
      body = <local> <function _SubnetworkRecCell.get_output.<locals>.body at 0x7f5777497760>
      packed_vars_for_body = <local> (<tf.Tensor 'output/rec/while/Identity:0' shape=() dtype=int32>, ([<tf.Tensor 'output/rec/while/Identity_1:0' shape=(?,) dtype=int32>], [[<tf.Tensor 'ou
tput/rec/while/Identity_2:0' shape=(?, ?) dtype=float32>, <tf.Tensor 'output/rec/while/Identity_3:0' shape=(?, ?) dtype=int32>], [<tf.Tensor 'out...
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/layers/rec.py", line 3269, in _SubnetworkRecCell.get_output.<locals>.body
    line: self._construct(
              prev_outputs=prev_outputs,
              prev_extra=prev_extra,
              i=i,
              data=data_,
              inputs_moved_out_tas=input_layers_moved_out_tas,
              needed_outputs=needed_outputs,
          )
    locals:
      self = <local> <_SubnetworkRecCell '/output(rec-subnet)'>
      self._construct = <local> <bound method _SubnetworkRecCell._construct of <_SubnetworkRecCell '/output(rec-subnet)'>>
      prev_outputs = <local> {'output': <tf.Tensor 'output/rec/while_loop_body/prev_outputs/identity_output:0' shape=(?,) dtype=int32>}
      prev_extra = <local> {'output': {'choice_scores': <tf.Tensor 'output/rec/while_loop_body/prev_extra/identity_output_choice_scores:0' shape=(?, ?) dtype=float32>, 'choice_src_beams': <
tf.Tensor 'output/rec/while_loop_body/prev_extra/identity_output_choice_src_beams:0' shape=(?, ?) dtype=int32>}, 'transformer_decoder_0..., len = 7
      i = <local> <tf.Tensor 'output/rec/while/Identity:0' shape=() dtype=int32>
      data = <not found>
      data_ = <local> {}
      inputs_moved_out_tas = <not found>
      input_layers_moved_out_tas = <local> {}
      needed_outputs = <local> {'output', 'end'}, len = 2
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/layers/rec.py", line 2244, in _SubnetworkRecCell._construct
    line: layer = get_layer(layer_name)
    locals:
      layer = <local> <CompareLayer output/'end' out_type=Tensor{[], dtype='bool', ctx=loop('dyn-time:output'[?])}>
      get_layer = <local> <function _SubnetworkRecCell._construct.<locals>.get_layer at 0x7f57774971c0>
      layer_name = <local> 'output', len = 6
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/layers/rec.py", line 2198, in _SubnetworkRecCell._construct.<locals>.get_layer
    line: layer = self.net.construct_layer(self.net_dict, name=name, get_layer=get_layer)
    locals:
      layer = <not found>
      self = <local> <_SubnetworkRecCell '/output(rec-subnet)'>
      self.net = <local> <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{
F'bpe_labels:sparse-dim'(5395)}}> train=False search>
      self.net.construct_layer = <local> <bound method TFNetwork.construct_layer of <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>>
      self.net_dict = <local> {'decoder': {'class': 'layer_norm', 'from': 'transformer_decoder_06'}, 'end': {'class': 'compare', 'from': ':i', 'kind': 'equal', 'value': 3}, 'output': {'beam_size': 12, 'class': 'choice', 'from': 'output_prob', 'initial_output': 0, 'target': 'bpe_labels'}, 'output_prob': {'class': 'softmax', 'f..., len = 145
      name = <local> 'output', len = 6
      get_layer = <local> <function _SubnetworkRecCell._construct.<locals>.get_layer at 0x7f57774971c0>
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/network.py", line 1210, in TFNetwork.construct_layer
    line: return add_layer(name=name_with_prefix, layer_class=layer_class, **layer_desc)
    locals:
      add_layer = <local> <bound method TFNetwork.add_layer of <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>>
      name = <local> 'output', len = 6
      name_with_prefix = <local> 'output', len = 6
      layer_class = <local> <class 'returnn.tf.layers.rec.ChoiceLayer'>
      layer_desc = <local> {'beam_size': 12, 'initial_output': 0, 'target': ['bpe_labels'], '_network': <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>, '..., len = 9
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/network.py", line 1406, in TFNetwork.add_layer
    line: layer = self._create_layer(name=name, layer_class=layer_class, **layer_desc)
    locals:
      layer = <not found>
      self = <local> <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bp
e_labels:sparse-dim'(5395)}}> train=False search>
      self._create_layer = <local> <bound method TFNetwork._create_layer of <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'o
utput/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>>
      name = <local> 'output', len = 6
      layer_class = <local> <class 'returnn.tf.layers.rec.ChoiceLayer'>
      layer_desc = <local> {'beam_size': 12, 'initial_output': 0, 'target': ['bpe_labels'], '_network': <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'
dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>, '..., len = 9
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/network.py", line 1301, in TFNetwork._create_layer
    line: layer = layer_class(**layer_desc)
    locals:
      layer = <not found>
      layer_class = <local> <class 'returnn.tf.layers.rec.ChoiceLayer'>
      layer_desc = <local> {'beam_size': 12, 'initial_output': 0, 'target': ['bpe_labels'], '_network': <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>, '..., len = 12
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/layers/rec.py", line 6153, in ChoiceLayer.__init__
    line: end_flags_flat = self.network.get_rec_step_info().get_prev_end_flag(
              target_search_choices=base_search_choices
          )  # (batch * beam_in,)
    locals:
      end_flags_flat = <not found>
      self = <local> <ChoiceLayer output/'output' out_type=Tensor{[B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}, ctx=loop('dyn-time:output'[?])}>
      self.network = <local> <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>
      self.network.get_rec_step_info = <local> <bound method TFNetwork.get_rec_step_info of <TFNetwork '/output(rec-subnet)' parent_layer=<RecLayer 'output' out_type=Tensor{[T|'dyn-time:output'[],B&Beam{'output/output'}(12)], dtype='int32', sparse_dim=Dim{F'bpe_labels:sparse-dim'(5395)}}> train=False search>>
      get_prev_end_flag = <not found>
      target_search_choices = <not found>
      base_search_choices = <local> <SearchChoices owner='prev:output' beam_size=12 beam_scores=shaped:(None,None)>
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/layers/rec.py", line 4967, in RecStepInfoLayer.get_prev_end_flag
    line: end_flag = tile_transposed(end_flag, axis=0, multiples=target_search_choices.beam_size)
    locals:
      end_flag = <local> <tf.Tensor 'output/rec/while/Identity_18:0' shape=() dtype=bool>
      tile_transposed = <local> <function tile_transposed at 0x7f578ef353f0>
      axis = <not found>
      multiples = <not found>
      target_search_choices = <local> <SearchChoices owner='prev:output' beam_size=12 beam_scores=shaped:(None,None)>
      target_search_choices.beam_size = <local> 12
  File "/nas/models/asr/jiangj/debug/step/returnn/returnn/tf/util/basic.py", line 2251, in tile_transposed
    line: assert 0 <= axis < ndim
    locals:
      axis = <local> 0
      ndim = <local> 0
AssertionError

albertz avatar Aug 26 '24 20:08 albertz