tensorflow-onnx
tensorflow-onnx copied to clipboard
Why tf.signal.rfft can not convert to onnx for TensorFlow2?
Hi, I'm using tf2onnx convert a pb model to onnx model with the following instruction:
python3 -m tf2onnx.convert --graphdef logs/model.pb --output logs/model.onnx --inputs xs:0,xlen:0 --outputs Identity:0
But it gives the following error:
The original code like this:
`
@tf.function def tf_stft(signals, frame_length, frame_step, fft_length=None, window_fn=tf.signal.hann_window, pad_end=False): signals = tf.convert_to_tensor(signals) signals.shape.with_rank_at_least(1) frame_length = tf.convert_to_tensor(frame_length) frame_length.shape.assert_has_rank(0) frame_step = tf.convert_to_tensor(frame_step) frame_step.shape.assert_has_rank(0)
if fft_length is None:
order = tf.ceil(tf.log(frame_length*1.0)/tf.log(2.))
fft_length = tf.math.pow(2, tf.cast(order, dtype=tf.int32))
else:
fft_length = tf.convert_to_tensor(fft_length)
framed_signals = tf.signal.frame(signals, fft_length,
frame_step, pad_end=pad_end)
if window_fn is not None:
window = window_fn(frame_length, dtype=framed_signals.dtype)
lpad = (fft_length - window.shape[-1]) // 2
window = tf.pad(window, [[0, 0]] * (window.shape.ndims - 1) +
[[lpad, fft_length - window.shape[-1] - lpad]])
framed_signals *= window
return tf.signal.rfft(framed_signals, [fft_length])
`
and I convert it to pb model first, then try to convert the pb model to onnx model failed. But in TensorFlow1.x environment, I use the same code convert it to saved model and then convert the saved model to onnx model, everything is ok. What's wrong? Anyone can give me some help? Thanks a lot. The pb model I put here.
I tried the conversion with tf 1.15+python 3.7, it also failed with another issue. Could you please share me your environment where you can run the conversion successfully?
Could you please add a call to tf.abs() following the tf.rfft() function? This behavior is By design, that tf2onnx cannot handle the case that tf RFFT op + Identity op.
Could you please add a call to tf.abs() following the tf.rfft() function? This behavior is By design, that tf2onnx cannot handle the case that tf RFFT op + Identity op.
Yes, great, add a call to tf.abs() following the tf.rfft() function works. If the final return is the rsult of tf.rfft() function will give this error message even in TensorFlow1.x environment, and following function tf.abs() works, but tf.math.abs() dose not. Thanks a lot for the help. By the way, the operator of tf.cond() and tf.while_loop() is still not supported in tf2onnx? When I try to convert the full model (a conformer model) to onnx model, it gives the following error, and in TensorFlow1.x environment, it gives the same error message.
Both of them are supported by tf2onnx already. Looks like this error was found in run_rewriters() which happened after the general conversion is done.
Could you please share more details of your latest error, like model or code, so I can take a local debug?
Both of them are supported by tf2onnx already. Looks like this error was found in run_rewriters() which happened after the general conversion is done.
Could you please share more details of your latest error, like model or code, so I can take a local debug?
Thanks a lot, the model is a little big and can not be directely attached here, and you can download it from my own project, the download link is:
https://git.xfj0.cn/https://github.com/yjiangling/Test/releases/download/1.0/full_model.zip
And the code is displayed below:
@tf.function
def l2r_greedy_decode(self, xs, xlen, training=False, parallel_iterations=32, swap_memory=False):
batch_size = tf.shape(xs)[0]
if self.hp.add_padding:
xs = tf.pad(xs, paddings=[[0, 0], [0, self.hp.pad_len*4], [0, 0]])
xs, xlen = self.sub_sampling(xs, xlen, training)
chunk_mask = self.encoder_mask(xlen, tf.shape(xs)[1])
memory = self.encoder(xs, chunk_mask, training)
memory = memory[:, :-self.hp.pad_len, :]
init_beam = self.hp.sos_token * tf.ones([batch_size, 1], dtype=tf.int32)
def _loop_cond(j, ys):
return (j > self.hp.eos_token) & (tf.shape(ys)[1] < 40)
def _decode_step(j, ys):
ylen = tf.ones(batch_size, dtype=tf.int32) * tf.shape(ys)[-1]
left_mask, combine_mask = self.decoder_mask(xlen, ylen)
output = self.l2r_decoder(ys, memory, left_mask, combine_mask, training)
ys = tf.cast(tf.math.argmax(output, axis=-1), dtype=tf.int32)
k = tf.math.reduce_max(tf.math.reduce_min(ys, axis=-1))
ys = tf.concat([init_beam, ys], axis=-1)
return k, ys
_, ys = tf.while_loop(cond = _loop_cond,
body = _decode_step,
loop_vars = (self.hp.vocab_size2, init_beam),
parallel_iterations = parallel_iterations,
shape_invariants = (tf.TensorShape([]), tf.TensorShape([None, None])),
swap_memory = swap_memory)
return ys
And I have verified the part of encoder (only return the result of encoder "memory") is OK, but add the decoder to decode step by step, it will give the error message mentioned before. The decoder mask function and decoder network like this:
class DecoderAttentionMask(tf.keras.layers.Layer):
def __init__(self, look_ahead, **kwargs):
super(DecoderAttentionMask, self).__init__()
self.look_ahead = look_ahead
@tf.function
def call(self, xlen, ylen):
size=tf.math.reduce_max(ylen)
decoder_look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, self.look_ahead)
decoder_padding_mask = 1 - tf.sequence_mask(ylen, maxlen=size, dtype=tf.float32)
decoder_future_mask = tf.math.maximum(decoder_look_ahead_mask, decoder_padding_mask[:, tf.newaxis, tf.newaxis, :])
mlen = tf.math.reduce_max(xlen)
encoder_padding_mask = tf.sequence_mask(xlen, maxlen=mlen, dtype=tf.float32)[:, tf.newaxis, tf.newaxis, :]
decoder_combine_mask = 1 - tf.linalg.matmul(1 - decoder_padding_mask[:, tf.newaxis, :, tf.newaxis], encoder_padding_mask)
return decoder_future_mask, decoder_combine_mask
class DecoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, dff, drop_rate=0.1):
super(DecoderLayer, self).__init__()
self.mha1 = MultiHeadAttention(d_model, num_heads)
self.mha2 = MultiHeadAttention(d_model, num_heads)
self.ffn = PointWiseFeedForwardNetwork(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-12)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-12)
self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-12)
self.dropout1 = tf.keras.layers.Dropout(drop_rate)
self.dropout2 = tf.keras.layers.Dropout(drop_rate)
self.dropout3 = tf.keras.layers.Dropout(drop_rate)
@tf.function
def call(self, inputs, memory, left_mask, combine_mask, training):
att_input1 = self.layernorm1(inputs)
att_out1, _ = self.mha1(att_input1, att_input1, att_input1, left_mask)
out1 = inputs + self.dropout1(att_out1, training=training)
att_input2 = self.layernorm2(out1)
att_out2, _ = self.mha2(att_input2, memory, memory, combine_mask)
out2 = out1 + self.dropout2(att_out2, training=training)
ffn_input = self.layernorm3(out2)
ffn_out = self.ffn(ffn_input)
out3 = out2 + self.dropout3(ffn_out, training=training)
return out3
I changed the command to convert pb model to onnx following the instruction like this:
python3 -m tf2onnx.convert --graphdef logs/model.pb --output logs/model.onnx --inputs xs:0[-1,-1],xlen:0[-1] --outputs Identity:0
And I will write a brief code to produce the pb model if it's needed.
Both of them are supported by tf2onnx already. Looks like this error was found in run_rewriters() which happened after the general conversion is done. Could you please share more details of your latest error, like model or code, so I can take a local debug?
Thanks a lot, the model is a little big and can not be directely attached here, and you can download it from my own project, the download link is:
https://git.xfj0.cn/https://github.com/yjiangling/Test/releases/download/1.0/full_model.zip
And the code is displayed below:
@tf.function def l2r_greedy_decode(self, xs, xlen, training=False, parallel_iterations=32, swap_memory=False): batch_size = tf.shape(xs)[0] if self.hp.add_padding: xs = tf.pad(xs, paddings=[[0, 0], [0, self.hp.pad_len*4], [0, 0]]) xs, xlen = self.sub_sampling(xs, xlen, training) chunk_mask = self.encoder_mask(xlen, tf.shape(xs)[1]) memory = self.encoder(xs, chunk_mask, training) memory = memory[:, :-self.hp.pad_len, :] init_beam = self.hp.sos_token * tf.ones([batch_size, 1], dtype=tf.int32) def _loop_cond(j, ys): return (j > self.hp.eos_token) & (tf.shape(ys)[1] < 40) def _decode_step(j, ys): ylen = tf.ones(batch_size, dtype=tf.int32) * tf.shape(ys)[-1] left_mask, combine_mask = self.decoder_mask(xlen, ylen) output = self.l2r_decoder(ys, memory, left_mask, combine_mask, training) ys = tf.cast(tf.math.argmax(output, axis=-1), dtype=tf.int32) k = tf.math.reduce_max(tf.math.reduce_min(ys, axis=-1)) ys = tf.concat([init_beam, ys], axis=-1) return k, ys _, ys = tf.while_loop(cond = _loop_cond, body = _decode_step, loop_vars = (self.hp.vocab_size2, init_beam), parallel_iterations = parallel_iterations, shape_invariants = (tf.TensorShape([]), tf.TensorShape([None, None])), swap_memory = swap_memory) return ys
And I have verified the part of encoder (only return the result of encoder "memory") is OK, but add the decoder to decode step by step, it will give the error message mentioned before. The decoder mask function and decoder network like this:
class DecoderAttentionMask(tf.keras.layers.Layer): def __init__(self, look_ahead, **kwargs): super(DecoderAttentionMask, self).__init__() self.look_ahead = look_ahead @tf.function def call(self, xlen, ylen): size=tf.math.reduce_max(ylen) decoder_look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, self.look_ahead) decoder_padding_mask = 1 - tf.sequence_mask(ylen, maxlen=size, dtype=tf.float32) decoder_future_mask = tf.math.maximum(decoder_look_ahead_mask, decoder_padding_mask[:, tf.newaxis, tf.newaxis, :]) mlen = tf.math.reduce_max(xlen) encoder_padding_mask = tf.sequence_mask(xlen, maxlen=mlen, dtype=tf.float32)[:, tf.newaxis, tf.newaxis, :] decoder_combine_mask = 1 - tf.linalg.matmul(1 - decoder_padding_mask[:, tf.newaxis, :, tf.newaxis], encoder_padding_mask) return decoder_future_mask, decoder_combine_mask class DecoderLayer(tf.keras.layers.Layer): def __init__(self, d_model, num_heads, dff, drop_rate=0.1): super(DecoderLayer, self).__init__() self.mha1 = MultiHeadAttention(d_model, num_heads) self.mha2 = MultiHeadAttention(d_model, num_heads) self.ffn = PointWiseFeedForwardNetwork(d_model, dff) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-12) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-12) self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-12) self.dropout1 = tf.keras.layers.Dropout(drop_rate) self.dropout2 = tf.keras.layers.Dropout(drop_rate) self.dropout3 = tf.keras.layers.Dropout(drop_rate) @tf.function def call(self, inputs, memory, left_mask, combine_mask, training): att_input1 = self.layernorm1(inputs) att_out1, _ = self.mha1(att_input1, att_input1, att_input1, left_mask) out1 = inputs + self.dropout1(att_out1, training=training) att_input2 = self.layernorm2(out1) att_out2, _ = self.mha2(att_input2, memory, memory, combine_mask) out2 = out1 + self.dropout2(att_out2, training=training) ffn_input = self.layernorm3(out2) ffn_out = self.ffn(ffn_input) out3 = out2 + self.dropout3(ffn_out, training=training) return out3
I changed the command to convert pb model to onnx following the instruction like this:
python3 -m tf2onnx.convert --graphdef logs/model.pb --output logs/model.onnx --inputs xs:0[-1,-1],xlen:0[-1] --outputs Identity:0
And I will write a brief code to produce the pb model if it's needed.
For this full_model.pb, I got an error like below which is different than what you attached before. Is this same like you met right now?
2023-12-28 18:56:07,151 - WARNING - Shape of placeholder 'xlen' is unknown, treated it as a scalar. Please use the --inputs flag and append the shape to the input name if this input is not a scalar. Traceback (most recent call last): File "/home/jay/anaconda3/envs/tf2onnx-debug/lib/python3.10/site-packages/tensorflow/python/framework/importer.py", line 510, in _import_graph_def_internal results = c_api.TF_GraphImportGraphDefWithResults( tensorflow.python.framework.errors_impl.InvalidArgumentError: Index out of range using input dim 1; input has only 1 dims for '{{node StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_FLOAT, _output_shapes=[
], begin_mask=9, ellipsis_mask=0, end_mask=9, new_axis_mask=6, shrink_axis_mask=0](StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/sub, StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice/stack, StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice/stack_1, StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice/stack_2)' with input shapes: [?], [4], [4], [4] and with computed input tensors: input[3] = <1 1 1 1>.
@fatcat-z
This behavior is By design
I have a signal preprocessing layer which operates on complex-valued signals. I had been running into similar problems, and ended up reading the code in signal.py. I was quite surprised to find that it's not using the ONNX op.DFT
, which specifies a whole gamut of [I]FFT ops, and rather implements only a little subset of FFT routines, rather inefficiently so, apparently to avoid the use of op.DFT
at any performance cost.
I'm wondering why such a design decision has been made. AFAIK, the Microsoft's ONNX runtime supports the full op set. Are there any concerns using this op in ONNX? Superficially, it seems to be well-defined in the ONNX spec.
@fatcat-z
This behavior is By design
I have a signal preprocessing layer which operates on complex-valued signals. I had been running into similar problems, and ended up reading the code in signal.py. I was quite surprised to find that it's not using the ONNX
op.DFT
, which specifies a whole gamut of [I]FFT ops, and rather implements only a little subset of FFT routines, rather inefficiently so, apparently to avoid the use ofop.DFT
at any performance cost.I'm wondering why such a design decision has been made. AFAIK, the Microsoft's ONNX runtime supports the full op set. Are there any concerns using this op in ONNX? Superficially, it seems to be well-defined in the ONNX spec.
ONNX DFT op was supported since opset 17, while current implementation in tf2onnx has been done couple of years ago.
Your suggestion is great and probably we can add new version(above opset 17) for those ops in signal.py to call DFT op to improve the performance. And your contributions are definitely welcome!
@fatcat-z, thank you for the clarification! I cannot commit any time at the moment to this, or anything at all. I'm already neck-deep in and late with my project. I'll have to rewrite signal feature extraction in C++ for production deployment meanwhile, while sciencing on in TF. Realistically, I don't think I'll have any time to spare in the next 6 mo. I think the best course of action is open a separate issue, so maybe someone else could step in meanwhile.
If you don't mind my asking of another question, the diagnostics listed the TF's Real
op as unsupported, the second one of the two in addition to IFFT
, but I don't dismiss the possibility of this being just an unexpected side effect of other failures. If you recall, is it in fact supported or not? If it's in fact unimplemented, I'll open an issue for that one, too (and its Imag
counterpart). It's a simple, shape-preserving pointwise op that takes the real part of a complex-valued tensor.
@fatcat-z, thank you for the clarification! I cannot commit any time at the moment to this, or anything at all. I'm already neck-deep in and late with my project. I'll have to rewrite signal feature extraction in C++ for production deployment meanwhile, while sciencing on in TF. Realistically, I don't think I'll have any time to spare in the next 6 mo. I think the best course of action is open a separate issue, so maybe someone else could step in meanwhile.
If you don't mind my asking of another question, the diagnostics listed the TF's
Real
op as unsupported, the second one of the two in addition toIFFT
, but I don't dismiss the possibility of this being just an unexpected side effect of other failures. If you recall, is it in fact supported or not? If it's in fact unimplemented, I'll open an issue for that one, too (and itsImag
counterpart). It's a simple, shape-preserving pointwise op that takes the real part of a complex-valued tensor.
No, Real and IFFT are not supported yet.
Both of them are supported by tf2onnx already. Looks like this error was found in run_rewriters() which happened after the general conversion is done. Could you please share more details of your latest error, like model or code, so I can take a local debug?
Thanks a lot, the model is a little big and can not be directely attached here, and you can download it from my own project, the download link is: https://git.xfj0.cn/https://github.com/yjiangling/Test/releases/download/1.0/full_model.zip And the code is displayed below:
@tf.function def l2r_greedy_decode(self, xs, xlen, training=False, parallel_iterations=32, swap_memory=False): batch_size = tf.shape(xs)[0] if self.hp.add_padding: xs = tf.pad(xs, paddings=[[0, 0], [0, self.hp.pad_len*4], [0, 0]]) xs, xlen = self.sub_sampling(xs, xlen, training) chunk_mask = self.encoder_mask(xlen, tf.shape(xs)[1]) memory = self.encoder(xs, chunk_mask, training) memory = memory[:, :-self.hp.pad_len, :] init_beam = self.hp.sos_token * tf.ones([batch_size, 1], dtype=tf.int32) def _loop_cond(j, ys): return (j > self.hp.eos_token) & (tf.shape(ys)[1] < 40) def _decode_step(j, ys): ylen = tf.ones(batch_size, dtype=tf.int32) * tf.shape(ys)[-1] left_mask, combine_mask = self.decoder_mask(xlen, ylen) output = self.l2r_decoder(ys, memory, left_mask, combine_mask, training) ys = tf.cast(tf.math.argmax(output, axis=-1), dtype=tf.int32) k = tf.math.reduce_max(tf.math.reduce_min(ys, axis=-1)) ys = tf.concat([init_beam, ys], axis=-1) return k, ys _, ys = tf.while_loop(cond = _loop_cond, body = _decode_step, loop_vars = (self.hp.vocab_size2, init_beam), parallel_iterations = parallel_iterations, shape_invariants = (tf.TensorShape([]), tf.TensorShape([None, None])), swap_memory = swap_memory) return ys
And I have verified the part of encoder (only return the result of encoder "memory") is OK, but add the decoder to decode step by step, it will give the error message mentioned before. The decoder mask function and decoder network like this:
class DecoderAttentionMask(tf.keras.layers.Layer): def __init__(self, look_ahead, **kwargs): super(DecoderAttentionMask, self).__init__() self.look_ahead = look_ahead @tf.function def call(self, xlen, ylen): size=tf.math.reduce_max(ylen) decoder_look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, self.look_ahead) decoder_padding_mask = 1 - tf.sequence_mask(ylen, maxlen=size, dtype=tf.float32) decoder_future_mask = tf.math.maximum(decoder_look_ahead_mask, decoder_padding_mask[:, tf.newaxis, tf.newaxis, :]) mlen = tf.math.reduce_max(xlen) encoder_padding_mask = tf.sequence_mask(xlen, maxlen=mlen, dtype=tf.float32)[:, tf.newaxis, tf.newaxis, :] decoder_combine_mask = 1 - tf.linalg.matmul(1 - decoder_padding_mask[:, tf.newaxis, :, tf.newaxis], encoder_padding_mask) return decoder_future_mask, decoder_combine_mask class DecoderLayer(tf.keras.layers.Layer): def __init__(self, d_model, num_heads, dff, drop_rate=0.1): super(DecoderLayer, self).__init__() self.mha1 = MultiHeadAttention(d_model, num_heads) self.mha2 = MultiHeadAttention(d_model, num_heads) self.ffn = PointWiseFeedForwardNetwork(d_model, dff) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-12) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-12) self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-12) self.dropout1 = tf.keras.layers.Dropout(drop_rate) self.dropout2 = tf.keras.layers.Dropout(drop_rate) self.dropout3 = tf.keras.layers.Dropout(drop_rate) @tf.function def call(self, inputs, memory, left_mask, combine_mask, training): att_input1 = self.layernorm1(inputs) att_out1, _ = self.mha1(att_input1, att_input1, att_input1, left_mask) out1 = inputs + self.dropout1(att_out1, training=training) att_input2 = self.layernorm2(out1) att_out2, _ = self.mha2(att_input2, memory, memory, combine_mask) out2 = out1 + self.dropout2(att_out2, training=training) ffn_input = self.layernorm3(out2) ffn_out = self.ffn(ffn_input) out3 = out2 + self.dropout3(ffn_out, training=training) return out3
I changed the command to convert pb model to onnx following the instruction like this:
python3 -m tf2onnx.convert --graphdef logs/model.pb --output logs/model.onnx --inputs xs:0[-1,-1],xlen:0[-1] --outputs Identity:0
And I will write a brief code to produce the pb model if it's needed.For this full_model.pb, I got an error like below which is different than what you attached before. Is this same like you met right now?
2023-12-28 18:56:07,151 - WARNING - Shape of placeholder 'xlen' is unknown, treated it as a scalar. Please use the --inputs flag and append the shape to the input name if this input is not a scalar. Traceback (most recent call last): File "/home/jay/anaconda3/envs/tf2onnx-debug/lib/python3.10/site-packages/tensorflow/python/framework/importer.py", line 510, in _import_graph_def_internal results = c_api.TF_GraphImportGraphDefWithResults( tensorflow.python.framework.errors_impl.InvalidArgumentError: Index out of range using input dim 1; input has only 1 dims for '{{node StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_FLOAT, _output_shapes=[], begin_mask=9, ellipsis_mask=0, end_mask=9, new_axis_mask=6, shrink_axis_mask=0](StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/sub, StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice/stack, StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice/stack_1, StatefulPartitionedCall/chunk_attention_mask/PartitionedCall/strided_slice/stack_2)' with input shapes: [?], [4], [4], [4] and with computed input tensors: input[3] = <1 1 1 1>.
Sorry for the late reply, I carefully checked the code and get rid of the error you met, here is the new model and code, could you please help me to check it agagin? Thanks a lot!
By the way, I use command python3 -m tf2onnx.convert --graphdef logs/model.pb --output logs/model.onnx --inputs xs:0[1,-1,512],xlen:0[1] --outputs Identity:0
to conduct convert pb model to onnx model.
The code is displayed below:
def l2r_greedy_decode(self, xs, xlen, training=False, parallel_iterations=32, swap_memory=False):
batch_size = tf.shape(xs)[0]
init_beam = self.hp.sos_token * tf.ones([batch_size, 1], dtype=tf.int32)
def _loop_cond(j, ys):
return (j > self.hp.eos_token) & (tf.shape(ys)[1] < 40)
def _decode_step(j, ys):
ylen = tf.ones(batch_size, dtype=tf.int32) * tf.shape(ys)[-1]
left_mask, combine_mask = self.decoder_mask(xlen, ylen)
output = self.l2r_decoder(ys, xs, left_mask, combine_mask, training)
ys = tf.cast(tf.math.argmax(output, axis=-1), dtype=tf.int32)
k = tf.math.reduce_max(tf.math.reduce_min(ys, axis=-1))
ys = tf.concat([init_beam, ys], axis=-1)
return k, ys
_, ys = tf.while_loop(cond = _loop_cond,
body = _decode_step,
loop_vars = (self.hp.vocab_size2, init_beam),
parallel_iterations = parallel_iterations,
shape_invariants = (tf.TensorShape([]), tf.TensorShape([None, None])),
swap_memory = swap_memory)
return ys
The OP tf.linalg.band_part not support for TensorRT, so I changed some code:
class DecoderAttentionMask(tf.keras.layers.Layer):
def __init__(self, look_ahead, **kwargs):
super(DecoderAttentionMask, self).__init__()
self.look_ahead = look_ahead
@tf.function
def call(self, xlen, ylen):
size=tf.math.reduce_max(ylen)
# decoder_look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, self.look_ahead)
decoder_look_ahead_mask = 1 - tf.sequence_mask(tf.range(size)+1+self.look_ahead, maxlen=size, dtype=tf.float32)
decoder_padding_mask = 1 - tf.sequence_mask(ylen, maxlen=size, dtype=tf.float32)
decoder_future_mask = tf.math.maximum(decoder_look_ahead_mask, decoder_padding_mask[:, tf.newaxis, tf.newaxis, :])
mlen = tf.math.reduce_max(xlen)
encoder_padding_mask = tf.sequence_mask(xlen, maxlen=mlen, dtype=tf.float32)[:, tf.newaxis, tf.newaxis, :]
decoder_combine_mask = 1 - tf.linalg.matmul(1 - decoder_padding_mask[:, tf.newaxis, :, tf.newaxis], encoder_padding_mask)
return decoder_future_mask, decoder_combine_mask
class DecoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, dff, drop_rate=0.1):
super(DecoderLayer, self).__init__()
self.mha1 = MultiHeadAttention(d_model, num_heads)
self.mha2 = MultiHeadAttention(d_model, num_heads)
self.ffn = PointWiseFeedForwardNetwork(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-12)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-12)
self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-12)
self.dropout1 = tf.keras.layers.Dropout(drop_rate)
self.dropout2 = tf.keras.layers.Dropout(drop_rate)
self.dropout3 = tf.keras.layers.Dropout(drop_rate)
@tf.function
def call(self, inputs, memory, left_mask, combine_mask, training):
att_input1 = self.layernorm1(inputs)
att_out1, _ = self.mha1(att_input1, att_input1, att_input1, left_mask)
out1 = inputs + self.dropout1(att_out1, training=training)
att_input2 = self.layernorm2(out1)
att_out2, _ = self.mha2(att_input2, memory, memory, combine_mask)
out2 = out1 + self.dropout2(att_out2, training=training)
ffn_input = self.layernorm3(out2)
ffn_out = self.ffn(ffn_input)
out3 = out2 + self.dropout3(ffn_out, training=training)
return out3
The new model is put here:
https://gh.ddlc.top/https://github.com/yjiangling/Test/releases/download/1.0/model.zip