model-optimization
model-optimization copied to clipboard
Quantize naive !!!
Hi all, I am working on quantization. I have a .h5 model. I want to convert the weights from float 32 to int8 or float16. It seem to be " post training quantization". How can I do that without converting to tflite ( still save as .h5 model) ?
I tried: import os, argparse, json, cv2
Import necessary items from Keras
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Activation, Dropout, UpSampling2D from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
Import local packages
import tensorflow_model_optimization as tfmot import tensorflow as tf from tensorflow.keras.models import model_from_json DEBUG = False
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
def apply_quantization(skip_layers): def wrapper(layer): if type(layer) in skip_layers: print(layer.name) return layer else: return tfmot.quantization.keras.quantize_annotate_layer(layer) return wrapper
# def wrapper(layer):
# if type(layer) in skip_layers:
# print(layer.name)
# return tfmot.quantization.keras.quantize_annotate_layer(layer)
# else:
# return layer
# return wrapper
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultQuantizeConfig(tfmot.quantization.keras.QuantizeConfig): def get_weights_and_quantizers(self, layer): return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]
def get_activations_and_quantizers(self, layer):
return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]
def set_quantize_weights(self, layer, quantize_weights):
layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
layer.activation = quantize_activations[0]
def get_output_quantizers(self, layer):
return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
num_bits=4, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
return {}
"""
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False))]
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return [
(layer.activation, MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]
def set_quantize_weights(self, layer, quantize_weights):
# Add this line for each item returned in `get_weights_and_quantizers`
# , in the same order
layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
# Add this line for each item returned in `get_activations_and_quantizers`
# , in the same order.
layer.activation = quantize_activations[0]
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return []
def get_config(self):
return {}
"""
if name == 'main':
input_shape = (320,320,3)
with open("path/to/1_model_quantize.json") as f:
json_model = f.read()
model = model_from_json(json_model)
model.load_weights("path to h5")
model.summary()
# Quantize
quantize_model = tfmot.quantization.keras.quantize_model
# q_aware stands for for quantization aware.
# q_aware_model = quantize_model(model)
q_aware_model = tf.keras.models.clone_model(model, clone_function=apply_quantization(skip_layers=[BatchNormalization]))
with tfmot.quantization.keras.quantize_scope({'DefaultQuantizeConfig': DefaultQuantizeConfig,
}):
quant_aware_model = tfmot.quantization.keras.quantize_apply(q_aware_model)
# `quantize_model` requires a recompile.
quant_aware_model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['mean_squared_error', 'accuracy'])
quantize_file = "save quantize .h5"
quant_aware_model.summary()
tf.keras.models.save_model(quant_aware_model, quantize_file, include_optimizer=False)
the result is all layer still in float32 Thank you so much.