liac-arff icon indicating copy to clipboard operation
liac-arff copied to clipboard

Issue- "BadLayout: Invalid layout of the ARFF file, at line 3

Open Malhabib opened this issue 4 years ago • 1 comments

file = open("data/final-dataset.arff", 'r')

# Togglable Options

regenerate_model = False

regenerate_data = False

generate_graphs = True

save_model = True

create_model_image = False

def generate_model(shape): # define the model model = Sequential()

model.add(Dense(30, input_dim=shape, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
model.add(Dense(5, activation='softmax'))
print(model.summary())

return model

def scrape_data(): # decode the .arff data and change text labels into numerical decoder = arff.ArffDecoder() data = decoder.decode(file, encode_nominal=True)

# split the raw data into data and labels
vals = [val[0: -1] for val in data['data']]
labels = [label[-1] for label in data['data']]

for val in labels:
    if labels[val] != 0:
        labels[val] = 1

# split the labels and data into traning and validation sets
training_data = vals[0: int(.9 * len(vals))]
training_labels = labels[0: int(.9 * len(vals))]
validation_data = vals[int(.9 * len(vals)):]
validation_labels = labels[int(.9 * len(vals)):]


print(training_labels)

# flatten labels with one hot encoding
training_labels = to_categorical(training_labels, 5)
validation_labels = to_categorical(validation_labels, 5)

# save all arrays with numpy
np.save('saved-files/vals', np.asarray(vals))
np.save('saved-files/labels', np.asarray(labels))
np.save('saved-files/training_data', np.asarray(training_data))
np.save('saved-files/validation_data', np.asarray(validation_data))
np.save('saved-files/training_labels', np.asarray(training_labels))
np.save('saved-files/validation_labels', np.asarray(validation_labels))

check to see if saved data exists, if not then create the data

if not os.path.exists('saved-files/training_data.npy') or not os.path.exists(

'saved-files/training_labels.npy') or not os.path.exists(

'saved-files/validation_data.npy') or not os.path.exists('saved-files/validation_labels.npy'):

print('creating')

if not os.path.exists('saved-files'):

os.mkdir('saved-files')

scrape_data()

scrape_data()

load the saved data

data_train = np.load('saved-files/training_data.npy') label_train = np.load('saved-files/training_labels.npy') data_eval = np.load('saved-files/validation_data.npy') label_eval = np.load('saved-files/validation_labels.npy')

generate and compile the model

model = generate_model(len(data_train[0])) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

initialize tensorboard

tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)

only using 3 epochs otherwise the model would overfit to the data

history = model.fit(data_train, label_train, validation_data=(data_eval, label_eval), epochs=2, callbacks=[tensorboard]) loss_history = history.history["loss"]

numpy_loss_history = np.array(loss_history) np.savetxt("saved-files/loss_history.txt", numpy_loss_history, delimiter=",")

model = load_model('saved-files/model.h5')

evaluating the model's performace

print(model.evaluate(data_eval, label_eval)) print(model.evaluate(data_train, label_train))

#if create_model_image: plot_model(model, to_file='model.png', show_shapes=True)

plt.figure(1)

summarize history for accuracy

plt.subplot(211) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left')

summarize history for loss

plt.subplot(212) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show()

save the model for later so no retraining is needed

model.save('saved-files/model.h5')

play sound when done with code to alert me

os.system('afplay /System/Library/Sounds/Ping.aiff') os.system('afplay /System/Library/Sounds/Ping.aiff')

Malhabib avatar Jul 13 '20 23:07 Malhabib

Please provide the arff input and explain why you think it is valid.

jnothman avatar Jul 13 '20 23:07 jnothman