Neural-GC
Neural-GC copied to clipboard
cLSTM train_model_ista() training loop
Hi @iancovert, thank you for your work and also the code!
I am going through the code in cLSTM.py
and have some questions about how train_model_ista()
is written. I wonder why there should be the first calculation of the smooth error outside the loop for it in range(max_iter):
. Would it be the same if the function is written as
def train_model_ista(clstm, X, context, lr, max_iter, lam=0, lam_ridge=0,
lookback=5, check_every=50, verbose=1):
p = X.shape[-1]
loss_fn = nn.MSELoss(reduction='mean')
train_loss_list = []
# Set up data.
X, Y = zip(*[arrange_input(x, context) for x in X])
X = torch.cat(X, dim=0)
Y = torch.cat(Y, dim=0)
# For early stopping.
best_it = None
best_loss = np.inf
best_model = None
for it in range(max_iter):
# Calculate smooth error.
pred = [clstm.networks[i](X)[0] for i in range(p)]
loss = sum([loss_fn(pred[i][:, :, 0], Y[:, :, i]) for i in range(p)])
ridge = sum([ridge_regularize(net, lam_ridge) for net in clstm.networks])
smooth = loss + ridge
# Take gradient step.
smooth.backward()
for param in clstm.parameters():
param.data -= lr * param.grad
# Take prox step.
if lam > 0:
for net in clstm.networks:
prox_update(net, lam, lr)
clstm.zero_grad()
# Check progress.
if (it + 1) % check_every == 0:
# Add nonsmooth penalty.
nonsmooth = sum([regularize(net, lam) for net in clstm.networks])
mean_loss = (smooth + nonsmooth) / p
train_loss_list.append(mean_loss.detach())
if verbose > 0:
print(('-' * 10 + 'Iter = %d' + '-' * 10) % (it + 1))
print('Loss = %f' % mean_loss)
print('Variable usage = %.2f%%'
% (100 * torch.mean(clstm.GC().float())))
# Check for early stopping.
if mean_loss < best_loss:
best_loss = mean_loss
best_it = it
best_model = deepcopy(clstm)
elif (it - best_it) == lookback * check_every:
if verbose:
print('Stopping early')
break
# Restore best model.
restore_parameters(clstm, best_model)
return train_loss_list