model-optimization
model-optimization copied to clipboard
problem with quantizing the BN layer
Hello, I am trying to perform a QAT on a ResNet50 network with BN layers, and I keep getting the following error:
ValueError: Shape must be rank 4 but is rank 5 for '{{node batch_normalization_8/FusedBatchNormV3}} = FusedBatchNormV3[T=DT_FLOAT, U=DT_FLOAT, data_format="NHWC", epsilon=0.001, exponential_avg_factor=1, is_training=false](Placeholder, batch_normalization_8/ReadVariableOp, batch_normalization_8/ReadVariableOp_1, batch_normalization_8/FusedBatchNormV3/ReadVariableOp, batch_normalization_8/FusedBatchNormV3/ReadVariableOp_1)' with input shapes: [1,?,7,7,64], [64], [64], [64], [64].
I tried to isolate each of the BN layers, and it appears they all cause the same error.
Here is the code I am trying to run:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import datetime as dt
import h5py
import math
import numpy as np
import pandas as pd
import scipy as sci
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers
from tensorflow.keras import activations
from tensorflow.keras import Model
from tensorflow.keras.layers import ZeroPadding2D,Add,Dense,Flatten,AveragePooling2D,Conv2D,BatchNormalization,MaxPooling2D,Activation,Input,Dropout
from IPython.display import SVG
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import model_to_dot
from tensorflow.python.keras.regularizers import Regularizer
import tensorflow_model_optimization as tfmot
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only use the third GPUtry:
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
def res_identity(x, filters):
#renet block where dimension doesnot change.
#The skip connection is just simple identity conncection
#we will have 3 blocks and then input will be added
x_skip = x # this will be used for addition with the residual block
f1, f2 = filters
#first block
x = Conv2D(f1, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer = Reg)(x)
x = BatchNormalization()(x)
x = Activation(activations.relu)(x)
#second block # bottleneck (but size kept same with padding)
x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer = Reg)(x)
x = BatchNormalization()(x)
x = Activation(activations.relu)(x)
# third block activation used after adding the input
x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer = Reg)(x)
x = BatchNormalization()(x)
# x = Activation(activations.relu)(x)
# add the input
x = Add()([x, x_skip])
x = tf.keras.activations.relu(x)
return x
def res_conv(x, s, filters):
'''
here the input size changes'''
x_skip = x
f1, f2 = filters
# first block
x = Conv2D(f1, kernel_size=(1, 1), strides=(s, s), padding='valid', kernel_regularizer = Reg)(x)
# when s = 2 then it is like downsizing the feature map
x = BatchNormalization()(x)
x = Activation(activations.relu)(x)
# second block
x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer = Reg)(x)
x = BatchNormalization()(x)
x = Activation(activations.relu)(x)
#third block
x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer = Reg)(x)
x = BatchNormalization()(x)
# shortcut
x_skip = Conv2D(f2, kernel_size=(1, 1), strides=(s, s), padding='valid', kernel_regularizer = Reg)(x_skip)
x_skip = BatchNormalization()(x_skip)
# add
x = Add()([x, x_skip])
x = Activation(activations.relu)(x)
return x
def resnet50():
input_im = Input(shape=(32, 32, 3))
x = ZeroPadding2D(padding=(3, 3))(input_im)
# 1st stage
# here we perform maxpooling, see the figure above
x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2))(x)
x = BatchNormalization()(x)
x = Activation(activations.relu)(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
#2nd stage
# frm here on only conv block and identity block, no pooling
x = res_conv(x, s=1, filters=(64, 256))
x = res_identity(x, filters=(64, 256))
x = res_identity(x, filters=(64, 256))
x = Dropout(0.15)(x)
# 3rd stage
x = res_conv(x, s=2, filters=(128, 512))
x = res_identity(x, filters=(128, 512))
x = res_identity(x, filters=(128, 512))
x = res_identity(x, filters=(128, 512))
x = Dropout(0.15)(x)
# 4th stage
x = res_conv(x, s=2, filters=(256, 1024))
x = res_identity(x, filters=(256, 1024))
x = res_identity(x, filters=(256, 1024))
x = res_identity(x, filters=(256, 1024))
x = res_identity(x, filters=(256, 1024))
x = res_identity(x, filters=(256, 1024))
x = Dropout(0.15)(x)
# 5th stage
x = res_conv(x, s=2, filters=(512, 2048))
x = res_identity(x, filters=(512, 2048))
x = res_identity(x, filters=(512, 2048))
# ends with average pooling and dense connection
x = AveragePooling2D((2, 2), padding='same')(x)
x = Flatten()(x)
x = Dropout(0.1)(x)
x = Dense(10, activation='softmax', kernel_initializer='he_normal', kernel_regularizer = Reg)(x) #multi-class
# define the model
model = Model(inputs=input_im, outputs=x, name='Resnet50')
return model
def main():
num_classes = 10
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
train_images = train_images.reshape((50000, 32, 32, 3)).astype("float32")
test_images = test_images.reshape((10000, 32, 32, 3)).astype("float32")
# Normalize pixel values to be between -1 and 1
train_images, test_images = train_images / 127.5 - 1, test_images / 127.5 - 1
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)
resNet50 = resnet50()
######################## mixed precision ######################
resNet50 = tfmot.quantization.keras.quantize_model(resNet50)
###############################################################
resNet50.compile(
tf.keras.optimizers.Adam(lr=0.01, decay=0.0001),
loss="categorical_crossentropy",
metrics=["accuracy"],
)
history = resNet50.fit(train_images, train_labels, batch_size = 32, epochs=100, callbacks=callbacks, validation_split=0.15)
if __name__ == "__main__":
main()
What am I missing?
Hi @lovodkin93 , can you share a colab so we can reproduce easily?
Also please let us know which TF and TF-MOT versions you're using.
sure thing: https://colab.research.google.com/drive/1o9hbgv4Toc59DTkM3h5PaBY1G1duiIdE?usp=sharing Thanks!
It's an issue when you use tf.nn.relu instead of tf.keras.layers.ReLU. (It converted to TFLambdaOp which has a trouble with current QAT API.)
Would you please use tf.keras.layers.ReLU if it's okay?
So I tried to replace every x = Activation(activations.relu)(x)
line with x = Activation(ReLU)(x)
, and now I get the following error (which is the reason I worked with x = Activation(activations.relu)(x)
in the first place):
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.
Do you happen to know why this might occur? Thanks!
Hi @Xhark ,
Is it possible that this same error might be happening using tensorflow.keras.activations.sigmoid
?
If so, what replacement should I employ? Since, there is not a tf.keras.layers.sigmoid
or such. Using a tf.keras.layers.Lambda
perhaps?
I had a similar issue with tf.split
.
I could overcome the error by wrapping tf.split in a keras layer:
@keras.saving.register_keras_serializable(package="MyLayers", name="SplitLayer")
class SplitLayer(keras.layers.Layer):
def __init__(self, num_or_size_splits, axis, **kwargs):
super(SplitLayer, self).__init__(**kwargs)
self.num_or_size_splits = num_or_size_splits
self.axis = axis
def call(self, inputs):
return tf.split(inputs, self.num_or_size_splits, axis=self.axis)
def get_config(self):
config = super(SplitLayer, self).get_config()
config.update({
'num_or_size_splits': self.num_or_size_splits,
'axis': self.axis,
})
return config
tf version: 2.15.1 keras version: 2.15.0 tfmot version: 0.7.5