tensorflow-maml
tensorflow-maml copied to clipboard
There might be error in the train_maml function.
The loss used for calculating the gradient to perform the meta update is only from one task. However, it should be the sum of all sampled tasks according to the original paper. Please look at step 8 in the code (below). The test_loss is inside the loop for i, t in enumerate(random.sample(dataset, len(dataset))), indicating the test_loss is only for one task.
# Step 2: instead of checking for convergence, we train for a number
# of epochs
for _ in range(epochs):
total_loss = 0
losses = []
start = time.time()
# Step 3 and 4
for i, t in enumerate(random.sample(dataset, len(dataset))):
**x, y = np_to_tensor(t.batch())**
model.forward(x) # run forward pass to initialize weights
with tf.GradientTape() as test_tape:
# test_tape.watch(model.trainable_variables)
# Step 5
with tf.GradientTape() as train_tape:
train_loss, _ = compute_loss(model, x, y)
# Step 6
gradients = train_tape.gradient(train_loss, model.trainable_variables)
k = 0
model_copy = copy_model(model, x)
for j in range(len(model_copy.layers)):
model_copy.layers[j].kernel = tf.subtract(model.layers[j].kernel,
tf.multiply(lr_inner, gradients[k]))
model_copy.layers[j].bias = tf.subtract(model.layers[j].bias,
tf.multiply(lr_inner, gradients[k+1]))
k += 2
# Step 8
**test_loss**, logits = compute_loss(model_copy, **x, y**)
# Step 8
gradients = test_tape.gradient(**test_loss**, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
Here my attempt to adapt the code. Now ten tasks are performed and summed in each step:
def train_maml(model, epochs, dataset, lr_inner=0.01, batch_size=1, log_steps=100): #MOD
# ...
optimizer = keras.optimizers.Adam()
# Step 2: instead of checking for convergence, we train for a number
# of epochs
for _ in range(epochs):
num_tasks = 10 #NEW
total_loss = 0
losses = []
start = time.time()
dataset_shuffled = random.sample(dataset, len(dataset)) #MOD
dataset_np = np.array(dataset_shuffled) #NEW
dataset_np = dataset_np.reshape((-1, num_tasks)) #NEW
# Step 3 and 4
for i in range(dataset_np.shape[0]): #MOD
batch_of_tasks = dataset_np[i] #NEW
with tf.GradientTape() as test_tape:
test_losses = [] #NEW
for t in batch_of_tasks: #NEW
x, y = np_to_tensor(t.batch())
model.forward(x) # run forward pass to initialize weights
# test_tape.watch(model.trainable_variables)
# Step 5
with tf.GradientTape() as train_tape:
train_loss, _ = compute_loss(model, x, y)
# Step 6
gradients = train_tape.gradient(train_loss, model.trainable_variables)
k = 0
model_copy = copy_model(model, x)
for j in range(len(model_copy.layers)):
model_copy.layers[j].kernel = tf.subtract(model.layers[j].kernel,
tf.multiply(lr_inner, gradients[k]))
model_copy.layers[j].bias = tf.subtract(model.layers[j].bias,
tf.multiply(lr_inner, gradients[k+1]))
k += 2
# Step 8
test_loss, logits = compute_loss(model_copy, x, y)
test_losses.append(test_loss) #NEW
test_losses_summed = sum(test_losses) / num_tasks #NEW
# Step 8
gradients = test_tape.gradient(test_losses_summed, model.trainable_variables) #MOD
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
# Logs
total_loss += test_losses_summed #MOD
loss = total_loss / (i+1.0)
losses.append(loss)
if i % log_steps == 0 and i > 0:
print('Step {}: loss = {}, Time to run {} steps = {}'.format(i, loss, log_steps, time.time() - start))
start = time.time()
plt.plot(losses)
plt.show()
@lishanwu135 I would be glad if you or somebody else can review the code.
Note: If you train for more than one epoch, you would also need to improve the loss calculations for this function (maybe that's worth opening another issue).