Unable to quantize to 4-bits
Prior to filing: check that this should be a bug instead of a feature request. Everything supported, including the compatible versions of TensorFlow, is listed in the overview page of each technique. For example, the overview page of quantization-aware training is here. An issue for anything not supported should be a feature request.
Describe the bug I'm trying to quantize Conv2D and Dense layers in my model but after quantizing the weights are not in the range of -7 to 7. Kindly help me in solving this issue.
System information
TensorFlow version (installed from source or binary): 2.8.0
TensorFlow Model Optimization version (installed from source or binary): 0.7.1
Python version:3.8.0
Describe the expected behavior
I expected Conv2D and Dense layers weights to be in the range of -7 to 7
Describe the current behavior But weights are in the range -128 to 127
Code to reproduce the issue Provide a reproducible code that is the bare minimum necessary to generate the problem.
import tempfile import os
import tensorflow as tf import numpy as np import tensorflow_model_optimization as tfmot from keras import backend as K from tensorflow import keras
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig): # Configure how to quantize weights. def get_weights_and_quantizers(self, layer): return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]
def set_quantize_weights(self, layer, quantize_weights):
# Add this line for each item returned in `get_weights_and_quantizers`
# , in the same order
layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
# Add this line for each item returned in `get_activations_and_quantizers`
# , in the same order.
layer.activation = quantize_activations[0]
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return []
def get_config(self):
return {}
quantize_annotate_layer = tfmot.quantization.keras.quantize_annotate_layer quantize_annotate_model = tfmot.quantization.keras.quantize_annotate_model quantize_scope = tfmot.quantization.keras.quantize_scope
class ModifiedDenseQuantizeConfig(DefaultDenseQuantizeConfig): # Configure weights to quantize with 4-bit instead of 8-bits. def get_weights_and_quantizers(self, layer): return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]
Load MNIST dataset
mnist = keras.datasets.mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
Normalize the input image so that each pixel value is between 0 and 1.
train_images = train_images / 255.0 test_images = test_images / 255.0
new_model = quantize_annotate_model(tf.keras.Sequential([
Pass in modified QuantizeConfig to modify this Dense layer.
tf.keras.layers.InputLayer(input_shape=(28, 28)),
tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
#tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
quantize_annotate_layer(tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), ModifiedDenseQuantizeConfig()),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Flatten(),
quantize_annotate_layer(tf.keras.layers.Dense(10), ModifiedDenseQuantizeConfig()),
#tf.keras.layers.Dense(10),
tf.keras.layers.Flatten()
])) new_model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) new_model.fit( train_images, train_labels, epochs=2, validation_split=0.1, ) new_model.summary()
quantize_apply requires mentioning ModifiedDenseQuantizeConfig with quantize_scope:
with quantize_scope(
{'ModifiedDenseQuantizeConfig': ModifiedDenseQuantizeConfig}):
# Use quantize_apply to actually make the model quantization aware.
quant_aware_model = tfmot.quantization.keras.quantize_apply(new_model)
quant_aware_model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) quant_aware_model.fit( train_images, train_labels, epochs=2, validation_split=0.1, ) quant_aware_model.summary()
converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model) converter.optimizations = [tf.lite.Optimize.DEFAULT] tflite_model = converter.convert() with open('No_prune_4bit.tflite', 'wb') as f: f.write(tflite_model)
Screenshots If applicable, add screenshots to help explain your problem.
Additional context Add any other context about the problem here.