lstm_han
lstm_han copied to clipboard
can it work with tensorflow backend?
First, thanks for this wonderful repo, I am trying to adopt your hatt-archive-cntk.ipynb
to tensorflow backend, it is giving the following error when constructing the model, wondering if you have any insight:
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1566 try:
-> 1567 c_op = c_api.TF_FinishOperation(op_desc)
1568 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimension size must be evenly divisible by 3000 but is 200 for 'time_distributed_1/Reshape_1' (op: 'Reshape') with input shapes: [200], [3] and with input tensors computed as partial shapes: input[1] = [?,15,200].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-24-38f8f519e000> in <module>()
10
11 review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
---> 12 review_encoder = TimeDistributed(sentEncoder)(review_input)
13 l_lstm_sent = Bidirectional(GRU(GRU_UNITS, return_sequences=True, kernel_regularizer=l2_reg,
14 implementation=GPU_IMPL))(review_encoder)
/usr/local/lib/python3.5/dist-packages/keras/engine/base_layer.py in __call__(self, inputs, **kwargs)
458 # Actually call the layer,
459 # collecting output(s), mask(s), and shape(s).
--> 460 output = self.call(inputs, **kwargs)
461 output_mask = self.compute_mask(inputs, previous_mask)
462
/usr/local/lib/python3.5/dist-packages/keras/layers/wrappers.py in call(self, inputs, training, mask)
253 output_shape = self._get_shape_tuple(
254 (-1, input_length), y, 1, output_shape[2:])
--> 255 y = K.reshape(y, output_shape)
256
257 # Apply activity regularizer if any:
/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py in reshape(x, shape)
1935 """
1936 print('before x={}, shape={}'.format(x, shape))
-> 1937 _x = tf.reshape(x, shape)
1938 print('after _x={}'.format(_x))
1939 return _x
/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
6111 if _ctx is None or not _ctx._eager_context.is_eager:
6112 _, _, _op = _op_def_lib._apply_op_helper(
-> 6113 "Reshape", tensor=tensor, shape=shape, name=name)
6114 _result = _op.outputs[:]
6115 _inputs_flat = _op.inputs
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
3390 input_types=input_types,
3391 original_op=self._default_original_op,
-> 3392 op_def=op_def)
3393
3394 # Note: shapes are lazily computed with the C API enabled.
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1732 op_def, inputs, node_def.attr)
1733 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1734 control_input_ops)
1735 else:
1736 self._c_op = None
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1568 except errors.InvalidArgumentError as e:
1569 # Convert to ValueError for backwards compatibility.
-> 1570 raise ValueError(str(e))
1571
1572 return c_op
ValueError: Dimension size must be evenly divisible by 3000 but is 200 for 'time_distributed_1/Reshape_1' (op: 'Reshape') with input shapes: [200], [3] and with input tensors computed as partial shapes: input[1] = [?,15,200].
Seems to be the dot product that is failing. Try replacing K.dot calls with the following in AttLayer.call:
def dot_product(x, kernel): if len(kernel.shape)==2: return K.squeeze(K.expand_dims(K.dot(x, kernel)), axis=-1) else: return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)