hyperas
hyperas copied to clipboard
Optimizing LSTM with embedding matrix - TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple
First of all thank you very much for your work,
I'm trying to use your framework to optimize hiperparameters in my LSTM network in order to implement a sentiment analysis classifier.
I used some snippet you posted but I cannot make it work. I think the main issue is how to calculate embedding_matrix (I'm using word embedings) to train the network. I trained separately tokenize to get weights file.
I'm getting the following error:
Hyperas search space:
def get_space(): return { 'Dropout': hp.uniform('Dropout', 0, 1), 'optimizer': hp.choice('optimizer', ['rmsprop', 'adam', 'sgd']), }
Traceback (most recent call last):
File "optim_keras.py", line 132, in
Thank you in advance for your help
Here's my code
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
def get_data():
import pickle
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from keras.datasets import imdb
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
import classes.filtros as NT
import classes.data_processing as DP
import classes.embeddings as EB
import classes.model as M
import classes.token as T
import classes.parameters as Params
import pandas as pd
import numpy as np
import sys
import time
# Carga de datos
print('Loading data...')
#instanciamos objetos necesarios
text_array = NT.NormalizeText()
data_processing = DP.DataProcessing()
word_embedding = EB.ProcessEmbeddings()
w2v = word_embedding.get_word2vec(Params.W2V_FILE)
data_set = data_processing.load_data()
# Separacion de los valores para tener un training set equilibrado
neutros = [row for row in data_set if 0 == row[2]]
positiv = [row for row in data_set if 1 == row[2]]
negativ = [row for row in data_set if -1 == row[2]]
df_neutros = pd.DataFrame.from_records(neutros)
df_positiv = pd.DataFrame.from_records(positiv)
df_negativ = pd.DataFrame.from_records(negativ)
minimo = np.min([len(df_neutros),len(df_positiv),len(df_negativ)])
df_final = pd.concat([df_neutros[:minimo], df_positiv[:minimo], df_negativ[:minimo]], ignore_index=True)
# Cargamos el tokenizer aprendido
token_path = './models/Tokenizer.pkl'
t_m = T.TokenizerModel()
with open(token_path, 'rb') as f:
t_m.t = pickle.load(f)
# # Procesado del texto y generación del token list
filtered = pd.DataFrame(columns=['textos'])
for row in df_final.itertuples():
texto_filt = word_embedding.clean_text(row._2)
filtered.loc[row.Index] = texto_filt
encoded_docs = t_m.t.texts_to_sequences(filtered['textos'])
# Codificamos los Documentos de entrada
X = pad_sequences(encoded_docs, maxlen=Params.MAX_SEQUENCE_LENGTH, padding='post')
y = df_final[2]
# # Separamos en train y test
sss = StratifiedShuffleSplit(n_splits=15,test_size=0.15)
for train_index, test_index in sss.split(X, y):
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = y[train_index], y[test_index]
# # Debemos cambiar a categorical las etiquetas dada la función de pérdida que usamos en el entrenamiento
y_train_bin = to_categorical(Y_train, num_classes=3, dtype='int32')
y_test_bin = to_categorical(Y_test, num_classes=3, dtype='int32')
return X_train,y_train_bin,X_test,y_test_bin
def keras_model(X_train,y_train_bin,X_test,y_test_bin):
import pickle
# Definición del modelo y entrenamiento
word_embedding = EB.ProcessEmbeddings()
w2v = word_embedding.get_word2vec(Params.W2V_FILE)
text_array = NT.NormalizeText()
# Cargamos el tokenizer aprendido
token_path = './models/Tokenizer.pkl'
t_m = T.TokenizerModel()
with open(token_path, 'rb') as f:
t_m.t = pickle.load(f)
# Generación de la embedding matrix (vocab_size, t, w2v, text_array)
embedding_matrix = word_embedding.Generate_Matrix(Params.MAX_NB_WORDS, t_m.t, w2v,text_array)
print('Build model...')
# Modelo LSTM
model = Sequential()
model.add(Embedding(Params.MAX_NB_WORDS, output_dim=Params.EMBEDDING_DIM, input_length=Params.MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=False))
model.add(Bidirectional(LSTM(Params.LSTM_UNITS_1ST, return_sequences=False)))
model.add(Dropout({{uniform(0, 1)}}))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.train(X_train, y_train_bin, X_test, y_test_bin)
early_stopping = EarlyStopping(monitor='val_loss', patience=4)
checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
verbose=1,
save_best_only=True)
hist = model.fit(X_train, y_train_bin,
nb_epoch=1,
validation_split=0.08,
show_accuracy=True,
callbacks=[early_stopping, checkpointer])
score, acc = model.evaluate(X_test, y_test_bin, show_accuracy=True, verbose=0)
print('Test accuracy:', acc)
return {'loss': -acc, 'status': STATUS_OK}
if __name__ == '__main__':
best_run = optim.minimize(model=keras_model,data=get_data(),algo=tpe.suggest,max_evals=10,trials=Trials())
print(best_run)
Did you figure it out? I'm getting a similar error