keras LSTM Model and FGSM Attack Implementation in TensorFlow: Shape Mismatch Errors

I'm working on a project to implement an FGSM (Fast Gradient Sign Method) attack on an LSTM model for stock price prediction using TensorFlow and Keras. Despite my efforts, I'm encountering persistent issues with tensor shape mismatches, and I'm hoping to get some advice from this community.

I’ve created an LSTM model to predict stock prices and tried generating adversarial examples using the FGSM method. The training seems to proceed without issues, but when I attempt to generate adversarial examples and evaluate the model with this data, I encounter the following error: Exception encountered when calling Sequential.call(). Cannot take the length of shape with unknown rank.

Arguments received by Sequential.call(): • inputs=tf.Tensor(shape=, dtype=float32) • training=False • mask=None ValueError: Cannot take the length of shape with unknown rank. During handling of the above exception, another exception occurred: test_predictions = model.predict(adversarial_test_data)

ValueError: Exception encountered when calling Sequential.call().

Here’s a summary of the relevant sections of my code:

import numpy as np
import pandas as pd
import os
import tensorflow as tf
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Input

def fgsm(input_data, model, loss, epsilon=0.01):

    """
    Generate adversarial examples using FGSM.
    :param input_data: Input data to be perturbed
    :param model: Trained model
    :param epsilon: Perturbation size
    """
    input_data = tf.convert_to_tensor(input_data, dtype=tf.float32)
    loss = tf.convert_to_tensor(loss, dtype=tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(input_data)
        prediction = model(input_data)
        loss = tf.keras.losses.MeanSquaredError()(loss, prediction)
    
    gradient = tape.gradient(loss, input_data)
    adversarial_example = input_data + epsilon * tf.sign(gradient)

    return adversarial_example.numpy()

## Read each stock datasets as dataframe ##
rootpath = os.path.dirname(__file__)
print (rootpath)
folderpath = os.path.join(rootpath, './Stocks')
for file_name in os.listdir(folderpath):
    if file_name.endswith('.csv'):
        df = pd.read_csv(os.path.join(folderpath,file_name),delimiter=',',usecols=['Date','Open','High','Low','Close','Adj Close','Volume'])
        basename, ext = os.path.splitext(file_name)
        df['Date'] = pd.to_datetime(df['Date'])  ## Convert 'Date' column to datetime format
       print ('file:', file_name)
    data = df.filter(['Close'])
    
    # Convert the dataframe to a numpy array
    dataset = data.values

    # Get the number of rows to train the model on
    training_data_len = int(np.ceil(len(dataset)* 0.8))

    scaler = MinMaxScaler(feature_range=(0,1))
    scale_data = scaler.fit_transform(dataset)

    ## Create the scaled training data set ##
    train_data = scale_data[0:int(training_data_len), :]

    ## Split the data into train data and test data sets ##
    x_train = []
    y_train = []

    for i in range(50, len(train_data)):
        x_train.append(train_data[i-50:i, 0])
        y_train.append(train_data[i,0])
        if i < 50:   
            print(x_train)
            print(y_train)
            print()

    ## Convert the x_data and y_data to numpy arrays ##
    x_train, y_train = np.array(x_train), np.array(y_train)

    ## Reshape the data
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    y_train = np.reshape(y_train, (y_train.shape[0], 1))
    
    ## Build the LSTM model ##
    model = Sequential()
    #model.add(LSTM(128, return_sequences=True, input_shape=(x_train.shape[1],1)))
    model.add(Input(shape=(x_train.shape[1], 1)))
    model.add(LSTM(128, return_sequences=True))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(25))
    model.add(Dense(1))

    ## Compile the model ##
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    ## Train the model ##
    model.fit(x_train, y_train, batch_size=32, epochs=1,validation_split=0.2) 

    ## Create the testing dataset ##
    test_data = scale_data[training_data_len - 50: , :]

    ## Create the datasets x_test
    x_test = []
    y_test = dataset[training_data_len: , :]
    for i in range (50, len(test_data)):
        x_test.append(test_data[i-50:i, 0])

    x_test = np.array(x_test)
    y_test = np.array(y_test)

    ## Reshape the data ##
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))
    y_test = np.reshape(y_test, (y_test.shape[0], 1))


    ## Generate adversarial examples for the training data ##
    adversarial_train_data = fgsm(x_train, model, y_train, epsilon=0.01)
    
    #Generate adversarial examples for the test data
    adversarial_test_data = fgsm(x_test, model, y_test, epsilon=0.01)
    
    # Ensure that the adversarial data has the correct shape
    adversarial_train_data = np.reshape(adversarial_train_data, (adversarial_train_data.shape[0], adversarial_train_data.shape[1], 1))
    adversarial_test_data = np.reshape(adversarial_test_data, (adversarial_test_data.shape[0], adversarial_test_data.shape[1], 1))
   
    ## Save adversarial examples to CSV files
    adversarial_train_data = pd.DataFrame(adversarial_train_data.reshape(adversarial_train_data.shape[0], -1))
    adversarial_train_data.to_csv('adversarial_train_data.csv', index=False)
    
    ## Evaluate the model with adversarial training data
    train_predictions = model.predict(adversarial_train_data)
    train_rmse = np.sqrt(np.mean((train_predictions - y_train) ** 2))
    print (f'Training RMSE with adversarial examples: {train_rmse}')

    ## Evaluate the model with adversarial test data
    test_predictions = model.predict(adversarial_test_data)
    test_rmse = np.sqrt(np.mean((test_predictions - y_test) ** 2))
    print (f'Test RMSE with adversarial examples: {test_rmse}')

How to properly handle tensor shapes in TensorFlow/Keras to avoid shape mismatch errors. Any insights into why the Sequential.call() method might be encountering an unknown rank shape error.

Any help or pointers would be greatly appreciated. Thank you in advance!

Aug 07 '24 00:08 Headshake

Hi @Headshake -

Can you provide dummy data dataset to reproduce the issue ?

Aug 07 '24 07:08 mehtamansi29

Here is the testing dataset: Stocks.zip

Aug 07 '24 14:08 Headshake

Hi @Headshake -

Here in your code Generate adversarial examples train data and test data are correct. So no need to correct shape of adversarial generated data. Removing this two line from the code will remove error and will Evaluate the model with adversarial train and test data.

adversarial_train_data = np.reshape(adversarial_train_data, (adversarial_train_data.shape[0], adversarial_train_data.shape[1], 1))
adversarial_test_data = np.reshape(adversarial_test_data, (adversarial_test_data.shape[0], adversarial_test_data.shape[1], 1))

Attached gist for the reference as well. Thanks..!!

Aug 08 '24 06:08 mehtamansi29

Hi @mehtamansi29,

Thanks for your response. I've tried the suggested solution, but I still get the same error message.

Aug 09 '24 11:08 Headshake

Hi @Headshake -

Which keras version are you using ?

Aug 09 '24 11:08 mehtamansi29

I'm using Keras 3.3.3

Aug 09 '24 12:08 Headshake

Can you try to upgrade keras with latest version using pip install --upgrade keras and try to run code with my suggestion?

Aug 09 '24 12:08 mehtamansi29

Update: I upgraded the keras model to 3.4.1, it's the same issue when I re-run the code.

Aug 09 '24 13:08 Headshake

Are pandas dataframes accepted as inputs?

The error happens in predict where you feed a dataframe instead of a numpy array.

Aug 09 '24 13:08 newresu

Yes, I read inputs as dataframes and convert them to numpy array before passing them to the function using these lines: x_train, y_train = np.array(x_train), np.array(y_train)

x_test = np.array(x_test) y_test = np.array(y_test)

Aug 09 '24 14:08 Headshake

Hi @Headshake -

This code will work successful on evaluating the model with adversarial training and testing data with keras(3.4.1) and tensorflow(2.17.0).

import numpy as np
import pandas as pd
import os
import tensorflow as tf
print(tf.__version__)
import keras
print(keras.__version__)
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Input

def fgsm(input_data, model, loss, epsilon=0.01):

    """
    Generate adversarial examples using FGSM.
    :param input_data: Input data to be perturbed
    :param model: Trained model
    :param epsilon: Perturbation size
    """
    input_data = tf.convert_to_tensor(input_data, dtype=tf.float32)
    loss = tf.convert_to_tensor(loss, dtype=tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(input_data)
        prediction = model(input_data)
        loss = tf.keras.losses.MeanSquaredError()(loss, prediction)

    gradient = tape.gradient(loss, input_data)
    adversarial_example = input_data + epsilon * tf.sign(gradient)

    return adversarial_example.numpy()

df = pd.read_csv('/content/AAPL.csv')
df['Date'] = pd.to_datetime(df['Date'])  ## Convert 'Date' column to datetime format

data = df.filter(['Close'])

# Convert the dataframe to a numpy array
dataset = data.values

# Get the number of rows to train the model on
training_data_len = int(np.ceil(len(dataset)* 0.8))

scaler = MinMaxScaler(feature_range=(0,1))
scale_data = scaler.fit_transform(dataset)

## Create the scaled training data set ##
train_data = scale_data[0:int(training_data_len), :]

## Split the data into train data and test data sets ##
x_train = []
y_train = []

for i in range(50, len(train_data)):
    x_train.append(train_data[i-50:i, 0])
    y_train.append(train_data[i,0])
    if i < 50:
        print(x_train)
        print(y_train)
        print()

## Convert the x_data and y_data to numpy arrays ##
x_train, y_train = np.array(x_train), np.array(y_train)

## Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], 1))

## Build the LSTM model ##
model = Sequential()
#model.add(LSTM(128, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(Input(shape=(x_train.shape[1], 1)))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(25))
model.add(Dense(1))

## Compile the model ##
model.compile(optimizer='adam', loss='mean_squared_error')

## Train the model ##
print(f'Training Model...')
model.fit(x_train, y_train, batch_size=32, epochs=1,validation_split=0.2)

## Create the testing dataset ##
test_data = scale_data[training_data_len - 50: , :]

## Create the datasets x_test
x_test = []
y_test = dataset[training_data_len: , :]
for i in range (50, len(test_data)):
    x_test.append(test_data[i-50:i, 0])

x_test = np.array(x_test)
y_test = np.array(y_test)

## Reshape the data ##
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))
y_test = np.reshape(y_test, (y_test.shape[0], 1))

## Generate adversarial examples for the training data ##
adversarial_train_data = fgsm(x_train, model, y_train, epsilon=0.01)

#Generate adversarial examples for the test data
adversarial_test_data = fgsm(x_test, model, y_test, epsilon=0.01)

print(adversarial_train_data.shape)
print(adversarial_test_data.shape)

## Evaluate the model with adversarial training data
print(f'Evaluate the model with adversarial training data...')
train_predictions = model.predict(adversarial_train_data)
train_rmse = np.sqrt(np.mean((train_predictions - y_train) ** 2))
print (f'Training RMSE with adversarial examples: {train_rmse}')

## Evaluate the model with adversarial test data
print(f'Evaluate the model with adversarial test data...')
test_predictions = model.predict(adversarial_test_data)
test_rmse = np.sqrt(np.mean((test_predictions - y_test) ** 2))
print (f'Test RMSE with adversarial examples: {test_rmse}')

Aug 09 '24 16:08 mehtamansi29

Yes, I can run the code above successfully, but only when the codes below are removed. In order to write the adversarial data into the file, I need to reshape the data.

adversarial_train_data = pd.DataFrame(adversarial_train_data.reshape(adversarial_train_data.shape[0], -1))
adversarial_train_data.to_csv('adversarial_train_data.csv', index=False)

Aug 13 '24 08:08 Headshake

Hi @Headshake -

Could you please confirm if this issue is resolved for you ? Please feel free to close the issue if it is resolved!