keras-monotonic-attention icon indicating copy to clipboard operation
keras-monotonic-attention copied to clipboard

Unable to use with real data

Open Bashima opened this issue 5 years ago • 1 comments

I am trying to use this attention code (Monotonic) for spanish to english translation (http://download.tensorflow.org/data/spa-eng.zip). I am getting this error

Input 0 is incompatible with layer AttentionDecoder: expected ndim=3, found ndim=2

I would really appreciate if you can help me out. I am quite new with RNN and Keras.

Following is the code I am using.

`

path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://download.tensorflow.org/data/spa-eng.zip', 
    extract=True)

path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"

def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
    w = unicode_to_ascii(w.lower().strip())
    
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)

    w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
    
    w = w.rstrip().strip()
    
    w = '<start> ' + w + ' <end>'
    return w

def create_dataset(path, num_examples):
    lines = open(path, encoding='UTF-8').read().strip().split('\n')
    
    word_pairs = [[preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:num_examples]]
    
    return word_pairs

class LanguageIndex():
  def __init__(self, lang):
    self.lang = lang
    self.word2idx = {}
    self.idx2word = {}
    self.vocab = set()
    
    self.create_index()
    
  def create_index(self):
    for phrase in self.lang:
      self.vocab.update(phrase.split(' '))
    
    self.vocab = sorted(self.vocab)
    
    self.word2idx['<pad>'] = 0
    for index, word in enumerate(self.vocab):
      self.word2idx[word] = index + 1
    
    for word, index in self.word2idx.items():
      self.idx2word[index] = word
    
def max_length(tensor):
    return max(len(t) for t in tensor)


def load_dataset(path, num_examples):
    pairs = create_dataset(path, num_examples)
    inp_lang = LanguageIndex(sp for en, sp in pairs)
    targ_lang = LanguageIndex(en for en, sp in pairs)
    input_tensor = [[inp_lang.word2idx[s] for s in sp.split(' ')] for en, sp in pairs]
    target_tensor = [[targ_lang.word2idx[s] for s in en.split(' ')] for en, sp in pairs]

    max_length_inp, max_length_tar = max_length(input_tensor), max_length(target_tensor)
    input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor, 
                                                                 maxlen=max_length_inp,
                                                                 padding='post')
    
    target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor, 
                                                                  maxlen=max_length_tar, 
                                                                  padding='post')
    
    return input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_tar

num_examples = 30000
input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_targ = load_dataset(path_to_file, num_examples)

input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val)

BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
embedding_dim = 256
units = 1024 # unit is the dimension of the output
vocab_inp_size = len(inp_lang.word2idx)
vocab_tar_size = len(targ_lang.word2idx)

print(vocab_inp_size, vocab_tar_size)

dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(BATCH_SIZE))


def gru(units):
    if tf.test.is_gpu_available():
        print("GPU")
        return tf.keras.layers.CuDNNGRU(units, 
                                        return_sequences=True, 
                                        return_state=True,
                                        recurrent_initializer='glorot_uniform')
    else:
        print("NO GPU")
        return tf.keras.layers.GRU(units, 
                               return_sequences=True, 
                               return_state=True, 
                               recurrent_activation='sigmoid', 
                               recurrent_initializer='glorot_uniform')

class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = gru(self.enc_units)
        
    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state = hidden)        
        return output, state
    
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

EPOCHA = 10
inputs = Input(shape=(None,), dtype='int64')
outp_true = Input(shape=(None,), dtype='int64')
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
attention_decoder = AttentionDecoder(units=units, alphabet_size=vocab_tar_size,
                                     embedding_dim=embedding_dim,
                                     is_monotonic=True,
                                     normalize_energy=True)

output = attention_decoder([inputs, outp_true])

model = Model(inputs=[inputs, outp_true], outputs=[output])
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adadelta',
    metrics=['accuracy'])
model.summary()

dec_input = tf.expand_dims([targ_lang.word2idx['<start>']] * BATCH_SIZE, 1)  
hidden = encoder.initialize_hidden_state()
for epoch in range(EPOCHS):
    for (batch, (inp, targ)) in enumerate (dataset):
        print(batch)
        enc_output, enc_hidden = encoder(inp, hidden)
        print(enc_hidden.shape)
        model.fit([enc_hidden, targ], targ,
           epochs=1, batch_size = 1,
           validation_data=([input_tensor_val, np.squeeze(target_tensor_val, axis=-1)], target_tensor_val))`

Bashima avatar May 31 '19 17:05 Bashima

If you need 3 dimensions instead of 2 you can create a new dimension with a single position. For example, if your array shape is (m, n) you reshape it to (m, n, 1).

Victor-Almeida avatar Oct 02 '19 05:10 Victor-Almeida