model-optimization
model-optimization copied to clipboard
16x8 Quantization fails for RNN model - Max and min for dynamic tensors should be recorded during calibration
Doing 16x8 quantization on RNN models fails.
Code to reproduce the issue gist to reproduce the issue on Google Colab Code:
import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot
def create_model():
model = tf.keras.models.Sequential()
# For the model to later get converted, batch_size and sequence_length should be fixed.
# E.g., batch_input_shape=[None, 10] will throw an error.
# This is just a limitation when using RNNs. E.g., for FC or CNN we can have batch_size=None
model.add(tf.keras.layers.Embedding(
input_dim=5,
output_dim=16,
batch_input_shape=[1, 10]
))
model.add(tf.keras.layers.SimpleRNN(
units=8,
return_sequences=True,
stateful=False
))
model.add(tf.keras.layers.Dense(5))
return model
model = create_model()
model.summary()
model.save("/content/model/")
representative_data = np.random.randint(0, 5, (200, 10)).astype(np.float32)
def representative_dataset():
for sample in representative_data:
sample = np.expand_dims(sample, axis=0) # batch_size = 1
yield [sample] # set sample as first (and only) input of the model
# 16x8 quantization - Fail
converter = tf.lite.TFLiteConverter.from_saved_model("/content/model/")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
# Commenting the following line will remove the error
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()
Error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
[<ipython-input-11-b8b7cfec032a>](https://localhost:8080/#) in <cell line: 7>()
5 # Commenting the following line will remove the error
6 converter.representative_dataset = representative_dataset
----> 7 tflite_quant_model = converter.convert()
10 frames
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in wrapper(self, *args, **kwargs)
960 def wrapper(self, *args, **kwargs):
961 # pylint: disable=protected-access
--> 962 return self._convert_and_export_metrics(convert_func, *args, **kwargs)
963 # pylint: enable=protected-access
964
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_and_export_metrics(self, convert_func, *args, **kwargs)
938 self._save_conversion_params_metric()
939 start_time = time.process_time()
--> 940 result = convert_func(self, *args, **kwargs)
941 elapsed_time_ms = (time.process_time() - start_time) * 1000
942 if result:
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in convert(self)
1245 graph_def)
1246
-> 1247 return self._convert_from_saved_model(graph_def)
1248
1249
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_from_saved_model(self, graph_def)
1129
1130 result = _convert_saved_model(**converter_kwargs)
-> 1131 return self._optimize_tflite_model(
1132 result, quant_mode, quant_io=self.experimental_new_quantizer)
1133
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
213 except Exception as error:
214 report_error_message(str(error))
--> 215 raise error from None # Re-throws the exception.
216
217 return wrapper
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
203 def wrapper(*args, **kwargs):
204 try:
--> 205 return func(*args, **kwargs)
206 except ConverterError as converter_error:
207 if converter_error.errors:
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _optimize_tflite_model(self, model, quant_mode, quant_io)
897 q_allow_float = quant_mode.is_allow_float()
898 q_variable_quantization = quant_mode.enable_mlir_variable_quantization
--> 899 model = self._quantize(model, q_in_type, q_out_type, q_activations_type,
900 q_bias_type, q_allow_float,
901 q_variable_quantization)
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _quantize(self, result, input_type, output_type, activations_type, bias_type, allow_float, enable_variable_quantization)
652 enable_variable_quantization=enable_variable_quantization)
653 else:
--> 654 return calibrate_quantize.calibrate_and_quantize(
655 self.representative_dataset.input_gen,
656 input_type,
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
213 except Exception as error:
214 report_error_message(str(error))
--> 215 raise error from None # Re-throws the exception.
216
217 return wrapper
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
203 def wrapper(*args, **kwargs):
204 try:
--> 205 return func(*args, **kwargs)
206 except ConverterError as converter_error:
207 if converter_error.errors:
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/optimize/calibrator.py](https://localhost:8080/#) in calibrate_and_quantize(self, dataset_gen, input_type, output_type, allow_float, activations_type, bias_type, resize_input, disable_per_channel)
174 """
175 self._feed_tensors(dataset_gen, resize_input)
--> 176 return self._calibrator.QuantizeModel(
177 np.dtype(input_type.as_numpy_dtype()).num,
178 np.dtype(output_type.as_numpy_dtype()).num, allow_float,
RuntimeError: Max and min for dynamic tensors should be recorded during calibration: Failed for tensor arg1
Empty min/max for tensor arg1
@yyoon Could you please check? Thanks!
@tucan9389 could you take a look? Not sure if RNN is supported at all.
Any update?
Hello, I'm facing the same problem. Is there any update on how to deal with this error?