liac-arff
liac-arff copied to clipboard
Issue- "BadLayout: Invalid layout of the ARFF file, at line 3
file = open("data/final-dataset.arff", 'r')
# Togglable Options
regenerate_model = False
regenerate_data = False
generate_graphs = True
save_model = True
create_model_image = False
def generate_model(shape): # define the model model = Sequential()
model.add(Dense(30, input_dim=shape, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
model.add(Dense(5, activation='softmax'))
print(model.summary())
return model
def scrape_data(): # decode the .arff data and change text labels into numerical decoder = arff.ArffDecoder() data = decoder.decode(file, encode_nominal=True)
# split the raw data into data and labels
vals = [val[0: -1] for val in data['data']]
labels = [label[-1] for label in data['data']]
for val in labels:
if labels[val] != 0:
labels[val] = 1
# split the labels and data into traning and validation sets
training_data = vals[0: int(.9 * len(vals))]
training_labels = labels[0: int(.9 * len(vals))]
validation_data = vals[int(.9 * len(vals)):]
validation_labels = labels[int(.9 * len(vals)):]
print(training_labels)
# flatten labels with one hot encoding
training_labels = to_categorical(training_labels, 5)
validation_labels = to_categorical(validation_labels, 5)
# save all arrays with numpy
np.save('saved-files/vals', np.asarray(vals))
np.save('saved-files/labels', np.asarray(labels))
np.save('saved-files/training_data', np.asarray(training_data))
np.save('saved-files/validation_data', np.asarray(validation_data))
np.save('saved-files/training_labels', np.asarray(training_labels))
np.save('saved-files/validation_labels', np.asarray(validation_labels))
check to see if saved data exists, if not then create the data
if not os.path.exists('saved-files/training_data.npy') or not os.path.exists(
'saved-files/training_labels.npy') or not os.path.exists(
'saved-files/validation_data.npy') or not os.path.exists('saved-files/validation_labels.npy'):
print('creating')
if not os.path.exists('saved-files'):
os.mkdir('saved-files')
scrape_data()
scrape_data()
load the saved data
data_train = np.load('saved-files/training_data.npy') label_train = np.load('saved-files/training_labels.npy') data_eval = np.load('saved-files/validation_data.npy') label_eval = np.load('saved-files/validation_labels.npy')
generate and compile the model
model = generate_model(len(data_train[0])) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
initialize tensorboard
tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)
only using 3 epochs otherwise the model would overfit to the data
history = model.fit(data_train, label_train, validation_data=(data_eval, label_eval), epochs=2, callbacks=[tensorboard]) loss_history = history.history["loss"]
numpy_loss_history = np.array(loss_history) np.savetxt("saved-files/loss_history.txt", numpy_loss_history, delimiter=",")
model = load_model('saved-files/model.h5')
evaluating the model's performace
print(model.evaluate(data_eval, label_eval)) print(model.evaluate(data_train, label_train))
#if create_model_image: plot_model(model, to_file='model.png', show_shapes=True)
plt.figure(1)
summarize history for accuracy
plt.subplot(211) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left')
summarize history for loss
plt.subplot(212) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show()
save the model for later so no retraining is needed
model.save('saved-files/model.h5')
play sound when done with code to alert me
os.system('afplay /System/Library/Sounds/Ping.aiff') os.system('afplay /System/Library/Sounds/Ping.aiff')
Please provide the arff input and explain why you think it is valid.