model-optimization icon indicating copy to clipboard operation
model-optimization copied to clipboard

16x8 Quantization fails for RNN model - Max and min for dynamic tensors should be recorded during calibration

Open Black3rror opened this issue 1 year ago • 4 comments

Doing 16x8 quantization on RNN models fails.

Code to reproduce the issue gist to reproduce the issue on Google Colab Code:

import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot

def create_model():
  model = tf.keras.models.Sequential()

  # For the model to later get converted, batch_size and sequence_length should be fixed.
  # E.g., batch_input_shape=[None, 10] will throw an error.
  # This is just a limitation when using RNNs. E.g., for FC or CNN we can have batch_size=None
  model.add(tf.keras.layers.Embedding(
    input_dim=5,
    output_dim=16,
    batch_input_shape=[1, 10]
  ))

  model.add(tf.keras.layers.SimpleRNN(
    units=8,
    return_sequences=True,
    stateful=False
  ))

  model.add(tf.keras.layers.Dense(5))

  return model

model = create_model()
model.summary()

model.save("/content/model/")

representative_data = np.random.randint(0, 5, (200, 10)).astype(np.float32)

def representative_dataset():
  for sample in representative_data:
    sample = np.expand_dims(sample, axis=0)     # batch_size = 1
    yield [sample]                              # set sample as first (and only) input of the model

# 16x8 quantization - Fail
converter = tf.lite.TFLiteConverter.from_saved_model("/content/model/")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
# Commenting the following line will remove the error
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()

Error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
[<ipython-input-11-b8b7cfec032a>](https://localhost:8080/#) in <cell line: 7>()
      5 # Commenting the following line will remove the error
      6 converter.representative_dataset = representative_dataset
----> 7 tflite_quant_model = converter.convert()

10 frames
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in wrapper(self, *args, **kwargs)
    960   def wrapper(self, *args, **kwargs):
    961     # pylint: disable=protected-access
--> 962     return self._convert_and_export_metrics(convert_func, *args, **kwargs)
    963     # pylint: enable=protected-access
    964 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_and_export_metrics(self, convert_func, *args, **kwargs)
    938     self._save_conversion_params_metric()
    939     start_time = time.process_time()
--> 940     result = convert_func(self, *args, **kwargs)
    941     elapsed_time_ms = (time.process_time() - start_time) * 1000
    942     if result:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in convert(self)
   1245           graph_def)
   1246 
-> 1247     return self._convert_from_saved_model(graph_def)
   1248 
   1249 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_from_saved_model(self, graph_def)
   1129 
   1130     result = _convert_saved_model(**converter_kwargs)
-> 1131     return self._optimize_tflite_model(
   1132         result, quant_mode, quant_io=self.experimental_new_quantizer)
   1133 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    213       except Exception as error:
    214         report_error_message(str(error))
--> 215         raise error from None  # Re-throws the exception.
    216 
    217     return wrapper

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    203     def wrapper(*args, **kwargs):
    204       try:
--> 205         return func(*args, **kwargs)
    206       except ConverterError as converter_error:
    207         if converter_error.errors:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _optimize_tflite_model(self, model, quant_mode, quant_io)
    897         q_allow_float = quant_mode.is_allow_float()
    898         q_variable_quantization = quant_mode.enable_mlir_variable_quantization
--> 899         model = self._quantize(model, q_in_type, q_out_type, q_activations_type,
    900                                q_bias_type, q_allow_float,
    901                                q_variable_quantization)

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _quantize(self, result, input_type, output_type, activations_type, bias_type, allow_float, enable_variable_quantization)
    652           enable_variable_quantization=enable_variable_quantization)
    653     else:
--> 654       return calibrate_quantize.calibrate_and_quantize(
    655           self.representative_dataset.input_gen,
    656           input_type,

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    213       except Exception as error:
    214         report_error_message(str(error))
--> 215         raise error from None  # Re-throws the exception.
    216 
    217     return wrapper

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    203     def wrapper(*args, **kwargs):
    204       try:
--> 205         return func(*args, **kwargs)
    206       except ConverterError as converter_error:
    207         if converter_error.errors:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/optimize/calibrator.py](https://localhost:8080/#) in calibrate_and_quantize(self, dataset_gen, input_type, output_type, allow_float, activations_type, bias_type, resize_input, disable_per_channel)
    174     """
    175     self._feed_tensors(dataset_gen, resize_input)
--> 176     return self._calibrator.QuantizeModel(
    177         np.dtype(input_type.as_numpy_dtype()).num,
    178         np.dtype(output_type.as_numpy_dtype()).num, allow_float,

RuntimeError: Max and min for dynamic tensors should be recorded during calibration: Failed for tensor arg1
Empty min/max for tensor arg1

Black3rror avatar Aug 30 '23 12:08 Black3rror

@yyoon Could you please check? Thanks!

cdh4696 avatar Aug 31 '23 01:08 cdh4696

@tucan9389 could you take a look? Not sure if RNN is supported at all.

yyoon avatar Sep 02 '23 06:09 yyoon

Any update?

Black3rror avatar Jan 27 '24 16:01 Black3rror

Hello, I'm facing the same problem. Is there any update on how to deal with this error?

HajarAva avatar Apr 15 '24 12:04 HajarAva