hyperas Optimizing LSTM with embedding matrix - TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple

Optimizing LSTM with embedding matrix - TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple

Open jlperezg opened this issue 5 years ago • 1 comments

First of all thank you very much for your work,

I'm trying to use your framework to optimize hiperparameters in my LSTM network in order to implement a sentiment analysis classifier.

I used some snippet you posted but I cannot make it work. I think the main issue is how to calculate embedding_matrix (I'm using word embedings) to train the network. I trained separately tokenize to get weights file.

I'm getting the following error:

Hyperas search space:

def get_space(): return { 'Dropout': hp.uniform('Dropout', 0, 1), 'optimizer': hp.choice('optimizer', ['rmsprop', 'adam', 'sgd']), }

Traceback (most recent call last): File "optim_keras.py", line 132, in best_run = optim.minimize(model=keras_model,data=get_data(),algo=tpe.suggest,max_evals=10,trials=Trials()) File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 69, in minimize keep_temp=keep_temp) File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 98, in base_minimizer model_str = get_hyperopt_model_string(model, data, functions, notebook_name, verbose, stack) File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 198, in get_hyperopt_model_string data_string = retrieve_data_string(data, verbose) File "/usr/bin/anaconda3/lib/python3.7/site-packages/hyperas/optim.py", line 219, in retrieve_data_string data_string = inspect.getsource(data) File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 973, in getsource lines, lnum = getsourcelines(object) File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 955, in getsourcelines lines, lnum = findsource(object) File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 768, in findsource file = getsourcefile(object) File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 684, in getsourcefile filename = getfile(object) File "/usr/bin/anaconda3/lib/python3.7/inspect.py", line 666, in getfile type(object).name)) TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple

Thank you in advance for your help

Here's my code


from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform



def get_data():
  import pickle
  from keras.preprocessing import sequence
  from keras.models import Sequential
  from keras.layers.core import Dense, Dropout, Activation
  from keras.layers.embeddings import Embedding
  from keras.layers.recurrent import LSTM
  from keras.datasets import imdb
  from keras.callbacks import EarlyStopping, ModelCheckpoint
  from keras.preprocessing.sequence import pad_sequences
  from keras.utils import to_categorical
  from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
  import classes.filtros as NT
  import classes.data_processing as DP
  import classes.embeddings as EB
  import classes.model as M
  import classes.token as T
  import classes.parameters as Params
  import pandas as pd
  import numpy as np
  import sys
  import time 

  # Carga de datos 
  print('Loading data...')
  #instanciamos objetos necesarios
  text_array = NT.NormalizeText()
  data_processing = DP.DataProcessing()
  word_embedding = EB.ProcessEmbeddings()
  w2v = word_embedding.get_word2vec(Params.W2V_FILE)
  data_set = data_processing.load_data()

  # Separacion de los valores para tener un training set equilibrado
  neutros = [row for row in data_set if 0  == row[2]]
  positiv = [row for row in data_set if 1  == row[2]]
  negativ = [row for row in data_set if -1 == row[2]]


  df_neutros = pd.DataFrame.from_records(neutros)
  df_positiv = pd.DataFrame.from_records(positiv)
  df_negativ = pd.DataFrame.from_records(negativ)

  minimo = np.min([len(df_neutros),len(df_positiv),len(df_negativ)])
  df_final = pd.concat([df_neutros[:minimo], df_positiv[:minimo], df_negativ[:minimo]], ignore_index=True)

    # Cargamos el tokenizer aprendido
  token_path = './models/Tokenizer.pkl'
  t_m = T.TokenizerModel()
  with open(token_path, 'rb') as f:
      t_m.t = pickle.load(f)


  # # Procesado del texto y generación del token list
  filtered = pd.DataFrame(columns=['textos'])
  for row in df_final.itertuples():
    texto_filt = word_embedding.clean_text(row._2)
    filtered.loc[row.Index] = texto_filt

  encoded_docs = t_m.t.texts_to_sequences(filtered['textos'])
  
  # Codificamos los Documentos de entrada
  X = pad_sequences(encoded_docs, maxlen=Params.MAX_SEQUENCE_LENGTH, padding='post')
  y = df_final[2]
  # # Separamos en train y test 
  sss = StratifiedShuffleSplit(n_splits=15,test_size=0.15)

  for train_index, test_index in sss.split(X, y):
      X_train, X_test = X[train_index], X[test_index]
      Y_train, Y_test = y[train_index], y[test_index]

  # # Debemos cambiar a categorical las etiquetas dada la función de pérdida que usamos en el entrenamiento
  y_train_bin = to_categorical(Y_train, num_classes=3, dtype='int32')
  y_test_bin = to_categorical(Y_test, num_classes=3, dtype='int32')

  return X_train,y_train_bin,X_test,y_test_bin


def keras_model(X_train,y_train_bin,X_test,y_test_bin):
  import pickle
# Definición del modelo y entrenamiento

  word_embedding = EB.ProcessEmbeddings()
  w2v = word_embedding.get_word2vec(Params.W2V_FILE)
  text_array = NT.NormalizeText()
  # Cargamos el tokenizer aprendido
  token_path = './models/Tokenizer.pkl'
  t_m = T.TokenizerModel()
  with open(token_path, 'rb') as f:
      t_m.t = pickle.load(f)

  # Generación de la embedding matrix (vocab_size, t, w2v, text_array)
  embedding_matrix = word_embedding.Generate_Matrix(Params.MAX_NB_WORDS, t_m.t, w2v,text_array)

  print('Build model...')
  # Modelo LSTM
  model   = Sequential()
  model.add(Embedding(Params.MAX_NB_WORDS, output_dim=Params.EMBEDDING_DIM, input_length=Params.MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=False))
  model.add(Bidirectional(LSTM(Params.LSTM_UNITS_1ST,  return_sequences=False)))
  model.add(Dropout({{uniform(0, 1)}}))
  model.add(Dense(3, activation='softmax'))

  model.compile(optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
  model.train(X_train, y_train_bin, X_test, y_test_bin)



  early_stopping = EarlyStopping(monitor='val_loss', patience=4)
  checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
                                   verbose=1,
                                   save_best_only=True)

  hist = model.fit(X_train, y_train_bin,
                     nb_epoch=1,
                     validation_split=0.08,
                     show_accuracy=True,
                     callbacks=[early_stopping, checkpointer])

  score, acc = model.evaluate(X_test, y_test_bin, show_accuracy=True, verbose=0)

  print('Test accuracy:', acc)
  return {'loss': -acc, 'status': STATUS_OK}



if __name__ == '__main__':
    best_run = optim.minimize(model=keras_model,data=get_data(),algo=tpe.suggest,max_evals=10,trials=Trials())
    print(best_run)

Nov 27 '19 09:11 jlperezg

Did you figure it out? I'm getting a similar error

Aug 23 '20 15:08 marianyamukuru

hyperas hyperas copied to clipboard

Optimizing LSTM with embedding matrix - TypeError: module, class, method, function, traceback, frame, or code object was expected, got tuple

hyperas
hyperas copied to clipboard