keras icon indicating copy to clipboard operation
keras copied to clipboard

BatchNormalization Contributes to NAN Loss after First Epoch.

Open NikoMolecule opened this issue 1 year ago • 2 comments

Hello, so i am coding my CNN model and everytime i add batchnormalization nan loss happens after first epoch. is there any workaround from this problem? chaning learning_rate doesnt effect nan loss at all!

initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
)
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
with tpu_strategy.scope():
    modelCNN = tf.keras.Sequential()
    
    modelCNN.add(tf.keras.layers.InputLayer(input_shape=(256, 256, 1)))
    modelCNN.add(tf.keras.layers.experimental.preprocessing.Rescaling(scale=1./255))
    
    modelCNN.add(tf.keras.layers.Conv2D(16, (1, 1), activation='relu'))
    modelCNN.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #224 x 224 x 1 -> 112 x 112 x 8
    
    modelCNN.add(tf.keras.layers.Conv2D(32, (1, 1), activation='relu'))
    modelCNN.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #112 x 112 x 8 -> 56 x 56 x 16
    
    modelCNN.add(tf.keras.layers.Conv2D(64, (1, 1), activation='relu'))
    modelCNN.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #56 x 56 x 16-> 28 x 28 x 32
    
    modelCNN.add(tf.keras.layers.Conv2D(128, (1, 1), activation='relu'))
    modelCNN.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #28 x 28 x 32 -> 14 x 14 x 64
    
    modelCNN.add(tf.keras.layers.Conv2D(256, (1, 1), activation='relu'))
    modelCNN.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
    modelCNN.add(tf.keras.layers.BatchNormalization())
    modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    # 14 x 14 x 64 -> 7 x 7 x 128
    
    modelCNN.add(tf.keras.layers.Flatten())
    modelCNN.add(tf.keras.layers.Dense(784, activation = 'relu'))
    modelCNN.add(tf.keras.layers.Dense(49, activation = 'relu'))
    
    modelCNN.add(tf.keras.layers.Dense(1, activation = 'linear'))
    modelCNN.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate), loss='mean_squared_error', metrics=['mae'])

NikoMolecule avatar Jan 21 '24 19:01 NikoMolecule

@NikoMolecule ,

Could you please try the code in Keras 3. You can install Keras 3 using pip install -U keras and import keras directly and let us know the outcome.

Below is the sample converted code:

import keras

initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
)
lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)

early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
with tpu_strategy.scope():
    modelCNN = keras.Sequential()
    
    modelCNN.add(keras.layers.InputLayer(input_shape=(256, 256, 1)))
    modelCNN.add(keras.layers.Rescaling(scale=1./255))
    
    modelCNN.add(keras.layers.Conv2D(16, (1, 1), activation='relu'))
    modelCNN.add(keras.layers.Conv2D(16, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #224 x 224 x 1 -> 112 x 112 x 8
    
    modelCNN.add(keras.layers.Conv2D(32, (1, 1), activation='relu'))
    modelCNN.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #112 x 112 x 8 -> 56 x 56 x 16
    
    modelCNN.add(keras.layers.Conv2D(64, (1, 1), activation='relu'))
    modelCNN.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #56 x 56 x 16-> 28 x 28 x 32
    
    modelCNN.add(keras.layers.Conv2D(128, (1, 1), activation='relu'))
    modelCNN.add(keras.layers.Conv2D(128, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.Conv2D(128, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    #28 x 28 x 32 -> 14 x 14 x 64
    
    modelCNN.add(keras.layers.Conv2D(256, (1, 1), activation='relu'))
    modelCNN.add(keras.layers.Conv2D(256, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.Conv2D(256, (3, 3), activation='relu'))
    modelCNN.add(keras.layers.BatchNormalization())
    modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    # 14 x 14 x 64 -> 7 x 7 x 128
    
    modelCNN.add(keras.layers.Flatten())
    modelCNN.add(keras.layers.Dense(784, activation = 'relu'))
    modelCNN.add(keras.layers.Dense(49, activation = 'relu'))
    
    modelCNN.add(keras.layers.Dense(1, activation = 'linear'))
    modelCNN.compile(optimizer=keras.optimizers.Adam(learning_rate=initial_learning_rate), loss='mean_squared_error', metrics=['mae'])

sachinprasadhs avatar Jan 22 '24 19:01 sachinprasadhs

This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.

github-actions[bot] avatar Feb 06 '24 01:02 github-actions[bot]

This issue was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further.

github-actions[bot] avatar Feb 21 '24 01:02 github-actions[bot]

Are you satisfied with the resolution of your issue? Yes No

google-ml-butler[bot] avatar Feb 21 '24 01:02 google-ml-butler[bot]