keras
keras copied to clipboard
BatchNormalization Contributes to NAN Loss after First Epoch.
Hello, so i am coding my CNN model and everytime i add batchnormalization nan loss happens after first epoch. is there any workaround from this problem? chaning learning_rate doesnt effect nan loss at all!
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
)
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
with tpu_strategy.scope():
modelCNN = tf.keras.Sequential()
modelCNN.add(tf.keras.layers.InputLayer(input_shape=(256, 256, 1)))
modelCNN.add(tf.keras.layers.experimental.preprocessing.Rescaling(scale=1./255))
modelCNN.add(tf.keras.layers.Conv2D(16, (1, 1), activation='relu'))
modelCNN.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#224 x 224 x 1 -> 112 x 112 x 8
modelCNN.add(tf.keras.layers.Conv2D(32, (1, 1), activation='relu'))
modelCNN.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#112 x 112 x 8 -> 56 x 56 x 16
modelCNN.add(tf.keras.layers.Conv2D(64, (1, 1), activation='relu'))
modelCNN.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#56 x 56 x 16-> 28 x 28 x 32
modelCNN.add(tf.keras.layers.Conv2D(128, (1, 1), activation='relu'))
modelCNN.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#28 x 28 x 32 -> 14 x 14 x 64
modelCNN.add(tf.keras.layers.Conv2D(256, (1, 1), activation='relu'))
modelCNN.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
modelCNN.add(tf.keras.layers.BatchNormalization())
modelCNN.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
# 14 x 14 x 64 -> 7 x 7 x 128
modelCNN.add(tf.keras.layers.Flatten())
modelCNN.add(tf.keras.layers.Dense(784, activation = 'relu'))
modelCNN.add(tf.keras.layers.Dense(49, activation = 'relu'))
modelCNN.add(tf.keras.layers.Dense(1, activation = 'linear'))
modelCNN.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate), loss='mean_squared_error', metrics=['mae'])
@NikoMolecule ,
Could you please try the code in Keras 3. You can install Keras 3 using pip install -U keras and import keras directly and let us know the outcome.
Below is the sample converted code:
import keras
initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
)
lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
with tpu_strategy.scope():
modelCNN = keras.Sequential()
modelCNN.add(keras.layers.InputLayer(input_shape=(256, 256, 1)))
modelCNN.add(keras.layers.Rescaling(scale=1./255))
modelCNN.add(keras.layers.Conv2D(16, (1, 1), activation='relu'))
modelCNN.add(keras.layers.Conv2D(16, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#224 x 224 x 1 -> 112 x 112 x 8
modelCNN.add(keras.layers.Conv2D(32, (1, 1), activation='relu'))
modelCNN.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#112 x 112 x 8 -> 56 x 56 x 16
modelCNN.add(keras.layers.Conv2D(64, (1, 1), activation='relu'))
modelCNN.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#56 x 56 x 16-> 28 x 28 x 32
modelCNN.add(keras.layers.Conv2D(128, (1, 1), activation='relu'))
modelCNN.add(keras.layers.Conv2D(128, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.Conv2D(128, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#28 x 28 x 32 -> 14 x 14 x 64
modelCNN.add(keras.layers.Conv2D(256, (1, 1), activation='relu'))
modelCNN.add(keras.layers.Conv2D(256, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.Conv2D(256, (3, 3), activation='relu'))
modelCNN.add(keras.layers.BatchNormalization())
modelCNN.add(keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
# 14 x 14 x 64 -> 7 x 7 x 128
modelCNN.add(keras.layers.Flatten())
modelCNN.add(keras.layers.Dense(784, activation = 'relu'))
modelCNN.add(keras.layers.Dense(49, activation = 'relu'))
modelCNN.add(keras.layers.Dense(1, activation = 'linear'))
modelCNN.compile(optimizer=keras.optimizers.Adam(learning_rate=initial_learning_rate), loss='mean_squared_error', metrics=['mae'])
This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.
This issue was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further.