PIDNet
PIDNet copied to clipboard
memory leak only after first epoch
Hi, I’ve encountered a problem with the Pidnet-Model. Some tensors are not being removed even after using gc.collect following the first train call. However, afterward, it seems to remain stable. That means that my GPU Ram usage increases only after first epoch. Can you identify which step might still have a reference to some tensors? Afterward it seems like these tensors are getting overwritten.
output of debug dataset
count of tensors on gpu 958 Epoch: [0/2] Iter:[0/15], Time: 3.64, lr: [0.00025910131519070223], Loss: 19.601892, Acc:0.023638, Semantic loss: 11.114876, BCE loss: 0.710389, SB loss: 7.776628 Epoch: [0/2] Iter:[10/15], Time: 0.42, lr: [0.00017988190752092693], Loss: 13.189713, Acc:0.052125, Semantic loss: 7.945725, BCE loss: 0.768364, SB loss: 4.475624 count of tensors on gpu 1926
` def train(config, epoch, num_epoch, epoch_iters, base_lr, num_iters, trainloader, optimizer, model, writer_dict, cosine_decay_scheduler=None): # Training print('training start') debug_util.print_tensors()
batch_time = AverageMeter()
ave_loss = AverageMeter()
ave_acc = AverageMeter()
avg_sem_loss = AverageMeter()
avg_bce_loss = AverageMeter()
tic = time.time()
cur_iters = epoch * epoch_iters
writer = writer_dict['writer']
sum_iters = len(trainloader)
global_steps = writer_dict['train_global_steps']
for i_iter, batch in enumerate(trainloader, 0):
images, labels, bd_gts, _, _ = batch
losses, _, acc, loss_list = model(images.cuda(), labels.long().cuda(), bd_gts.float().cuda())
loss = losses.mean()
acc = acc.mean()
model.zero_grad()
loss.backward()
optimizer.step()
# measure elapsed time
batch_time.update(time.time() - tic)
tic = time.time()
# update average loss
ave_loss.update(loss.item())
ave_acc.update(acc.item())
avg_sem_loss.update(loss_list[0].mean().item())
avg_bce_loss.update(loss_list[1].mean().item())
if cosine_decay_scheduler is not None:
cosine_decay_scheduler.step(epoch + i_iter / sum_iters)
else:
lr = adjust_learning_rate(optimizer,
base_lr,
num_iters,
i_iter + cur_iters)
if i_iter % config.PRINT_FREQ == 0:
msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \
'lr: {}, Loss: {:.6f}, Acc:{:.6f}, Semantic loss: {:.6f}, BCE loss: {:.6f}, SB loss: {:.6f}'.format(
epoch, num_epoch, i_iter, epoch_iters,
batch_time.average(), [x['lr'] for x in optimizer.param_groups], ave_loss.average(),
ave_acc.average(), avg_sem_loss.average(), avg_bce_loss.average(),
ave_loss.average() - avg_sem_loss.average() - avg_bce_loss.average())
logging.info(msg)
del images, labels, bd_gts, loss_list, loss, losses, acc, _, batch
del msg
debug_util.print_tensors()
writer.add_scalar('train_loss', ave_loss.average(), global_steps)
writer_dict['train_global_steps'] = global_steps + 1
del ave_loss, global_steps, writer, ave_acc, avg_sem_loss, avg_bce_loss, writer_dict
gc.collect()
torch.cuda.empty_cache()
`
P.S. the gc.collect, empty caching torch and these many dels are only for debug purposes.