cgds-package
cgds-package copied to clipboard
bug,
hi. I have the problem.
Shall you help me? Here is my code
This is my definition of optimizer. I don't know if there's a problem.
` class Network(nn.Module):
def __init__(self, criterion, cfg, **kwargs):
super(Network, self).__init__()
self.HRNet = HighResolutionNet(cfg, **kwargs)
self.HRNet.init_weights('/mnt/mountA/dzr/segcgd/pretrain/hrnet_w48_pascal_context_cls59_480x480.pth') # 这里是用初始化权重,用于测试(已经训练好了的)如果想要重新训练的话,直接注释掉即可
self.CoutHR = self.HRNet.last_inp_channels
self.dishsi = DisHSI()
self.respG = RespG() # 响应函数 返回的是响应谱和传入的MSI已经进行了相乘
self.hsiG = HSIG() # 重建网络 这两个是生成器 想把这两个网络进行合并为生成器
# self.RgbG = RGBG()
self.iter = 1
self.extractor = Fea_extra(self.CoutHR+31, cfg.DATASET.NUM_CLASSES) # 这个是一个特征提取,在cgd优化器不用管他,
# 将生成器进行整合,便于训练
self.generator = nn.Sequential(
self.respG,
self.hsiG,
)
# 这是优化器 cgd优化器 这是整体的,有一些不方便
rank = torch.distributed.get_rank()
self.g = DDP(self.generator.cuda(), device_ids=[rank], broadcast_buffers=False)
self.d = DDP(self.dishsi.cuda(), device_ids=[rank], broadcast_buffers=False)
g_reducer = self.g.reducer
d_reducer = self.d.reducer
self.cgd_optimizer = CGDs.ACGD(max_params=self.g.parameters(),
min_params=self.d.parameters(),
lr_max=1e-3, lr_min=1e-3,
max_reducer=g_reducer, min_reducer=d_reducer,
tol=1e-4, atol=1e-8)
# self.cgd_optimizer = CGDs.ACGD(max_params=itertools.chain(self.hsiG.parameters(), self.respG.parameters()),
# min_params=self.dishsi.parameters(),
# lr_max=1e-3, lr_min=1e-3,
# # max_reducer=g_reducer, min_reducer=d_reducer,
# tol=1e-4, atol=1e-8)
# 分别定义了优化器
# self.gen_optimizer = torch.optim.Adam(itertools.chain(self.hsiG.parameters(), self.respG.parameters(), self.extractor.parameters()),lr=1e-4)
# self.gen_optimizer = torch.optim.Adam(itertools.chain(self.hsiG.parameters(), self.respG.parameters()),lr=1e-4)
# self.gen_optimizer = torch.optim.Adam(itertools.chain(self.hsiG.parameters(), self.respG.parameters()), lr=1e-4) # 生成器太弱了 这个1
# # self.dis_optimizer = torch.optim.Adam(itertools.chain(self.dishsi.parameters(), self.dishsi_line.parameters()),lr=1e-3)
# self.dis_optimizer = torch.optim.Adam(itertools.chain(self.dishsi.parameters()), lr=1e-6) # 鉴别器太强了 lr = 5e-4 这个2
self.criterion_class = criterion
# self.criterion = nn.BCELoss()
self.criterion = nn.BCEWithLogitsLoss() # 改动
self.mean = np.array([0.485, 0.456, 0.406])
self.std = np.array([0.229, 0.224, 0.225])
self.gradnorm = GradNorm().apply`
This is the code for the training cycle.
def train(config, epoch, num_epoch, ### epoch_iters, base_lr, num_iters,
trainloader, optimizer, model, writer_dict, device, Logger=None):
# Training
model.train()
# model.HRNet.eval()
# model.hsiG.eval()
# model.hsiG.eval()
# model.respG.eval()
batch_time = AverageMeter()
ave_loss = AverageMeter()
tic = time.time()
cur_iters = epoch*epoch_iters
writer = writer_dict['writer']
global_steps = writer_dict['train_global_steps']
rank = get_rank()
world_size = get_world_size()
# # 将生成器进行整合,便于训练
# generator = nn.Sequential(
# model.module.respG,
# model.module.hsiG,
# )
# cgd_optimizer = CGDs.ACGD(max_params=generator.parameters(),
# min_params=model.module.dishsi.parameters(),
# lr_max=1e-3, lr_min=1e-3,
# # max_reducer=g_reducer, min_reducer=d_reducer,
# tol=1e-4, atol=1e-8)
loss_g = []
loss_d = []
GP_List = []
smooth_List = []
res_List = []
# 计数器
gen_train_count = 0
dis_train_count = 0
for i_iter, batch in enumerate(trainloader):
images, labels, _, _, MSI, HSI = batch # 获得一个batch中的各个数据
# images, labels, _, _,MSI = batch
model.zero_grad() # 梯度置为零
model.module.cgd_optimizer.zero_grad()
# cgd_optimizer.zero_grad()
images = images.to(device).float() # 将输入的图像数据(通常为RGB图像)转化为GPU上的张量,并且数据类型转化为float类型
MSI = MSI.to(device).float() # 将多光谱图像数据转化为GPU上的张量,数据类型依旧转化为float类型
labels = labels.long().to(device) # 将标签数据转化为GPU上的张量,将数据类型转化为long类型
HSI = HSI.to(device).float() # 同理,将高光谱数据也转化为GPU上的张量,数据类型转化为float类型
loss,_,_ = model(images,MSI,labels)
loss_d_, GP_loss = model.module.update_discriminator(MSI = MSI, HSI = HSI, rank = 0) # 多卡 加了一个module 更新鉴别器
loss_g_, smooth_loss, res_loss, gen_loss = model.module.update_generator(MSI = MSI, HSI = HSI, img = images, rank = 0, seg_label=labels) # 更新生成器损失
# loss = np.mean(np.array(loss_g_))
model.module.cgd_optimizer.step(loss_d_) # 调用更新步长
# cgd_optimizer.step(loss_d_) # 调用更新步长
loss_d_ = loss_d_.item()
loss_g.append(loss_g_.item() - gen_loss.item())
loss_d.append(loss_d_)
GP_List.append(GP_loss)
smooth_List.append(smooth_loss)
res_List.append(res_loss)
optimizer.step() # sgd优化器的更新
# measure elapsed time
batch_time.update(time.time() - tic)
tic = time.time()
# update average loss
ave_loss.update(loss_g_.item() - gen_loss.item())
# gen_lr = adjust_learning_rate(model.gen_optimizer,
# 1e-4,
# num_iters,
# 0)
dis_lr = adjust_learning_rate(optimizer,
base_lr,
num_iters,
i_iter+cur_iters)
`
I think there is something wrong with these two loss functions. I don't know if this is the case.
`
def Cal_generatorloss(self,MSI,HSI,rank):
real_label1 = 1.0
fake_label1 = 0.0
one = torch.FloatTensor([1]).to(MSI.device)
mone = one * -1
mone.to(one.device)
b_size, c, h, w = HSI.shape # 获取维度
# length =
real_label = torch.full((b_size,), real_label1, dtype=torch.float, device=HSI.device)
fake_label = torch.full((b_size,), fake_label1, dtype=torch.float, device=HSI.device)
real_label2 = torch.full((b_size*h*w,), real_label1, dtype=torch.float, device=HSI.device)
fake_label2 = torch.full((b_size*h*w,), fake_label1, dtype=torch.float, device=HSI.device)
# MSI --> Resp
resp_msi = self.respG(MSI) # 经过响应函数的多光谱数据
# MSI + resp --> HSI
fake_HSI,res_1 = self.hsiG(MSI, resp_msi) # 这是与重建网络进行结合 然后返回一个res_1 残差? 这是获得了一个伪高光谱
res_loss = torch.mean(res_1**2)
# res_loss = torch.mean(torch.abs(res_1))
# domain loss(2) HSI loss:
[B,C,H,W] = fake_HSI.shape
fea = fake_HSI.reshape([B,C,H*W])
pos = torch.abs(fake_HSI) - fake_HSI
pos = torch.sum(pos,dim=[1,2,3]).mean()
smooth = self.first_order(fea)
smooth = torch.mean(smooth**2)
# smooth = torch.mean(torch.abs(smooth))
# max_value = torch.amax(fake_HSI,dim=[1,2,3],keepdim=True)
mean_vlaue = torch.mean(torch.abs(fake_HSI),dim=[1,2,3],keepdim=True)
mean_vlaue1 = mean_vlaue.detach()+1e-6
HSI_pred = self.dishsi( self.gradnorm(fake_HSI, torch.ones(1, device = fake_HSI.device))/mean_vlaue1)
HSI_pred = torch.squeeze(HSI_pred)
HSI_dis_loss = self.criterion(HSI_pred,real_label)
loss = HSI_dis_loss + res_loss*1e2 + smooth*1e1 #+ pos*1e-2
if self.iter %20 ==0:
if rank == 0:
# print('[iter: {}/991][Gen loss:{:.4f}], [HSI adversarial loss:{:.4f}], [res loss:{:.4f}] [smooth loss :{:.4f}]'.format(self.iter%991,loss.item(), HSI_dis_loss.mean().item(), res_loss.mean().item(),smooth.mean().item()))
logging.info('[iter: {}/371][Gen loss:{:.4f}], [HSI adversarial loss:{:.4f}], [res loss:{:.4f}] [smooth loss :{:.4f}] [pos loss:{:.4f}]'.format(self.iter%371,loss.item(), HSI_dis_loss.mean().item(), res_loss.mean().item(),smooth.mean().item(),pos.item()))
return loss, fake_HSI, smooth.item(), res_loss.item() #, MSI_pred , HSI_pred.mean().reshape(1)
def Cal_discriminatorloss(self,MSI,HSI,rank):
real_label1 = 1.0
fake_label1 = 0.0
one = torch.FloatTensor([1]).to(MSI.device)
mone = one * -1
mone.to(one.device)
b_size = HSI.size(0)
# MSI --> Resp
resp_msi = self.respG(MSI)
# MSI + resp --> HSI
fake_HSI,_ = self.hsiG(MSI,resp_msi)
[B,C,H,W] = fake_HSI.shape
length = int(B*W*H)
real_label = torch.full((B,), real_label1, dtype=torch.float, device=HSI.device)
fake_label = torch.full((B,), fake_label1, dtype=torch.float, device=HSI.device)
real_label1 = torch.full((length,), real_label1, dtype=torch.float, device=HSI.device)
fake_label1 = torch.full((length,), fake_label1, dtype=torch.float, device=HSI.device)
# # domain loss(2) HSI loss:
# max_value = torch.amax(fake_HSI,dim=[1,2,3],keepdim=True)
# print('shape of the fakeHSI:{}'.format(fake_HSI.shape))
mean_vlaue = torch.mean(torch.abs(fake_HSI),dim=[1,2,3],keepdim=True)
mean_vlaue1 = mean_vlaue.detach()+1e-6
fake_HSI_pred = self.dishsi(self.gradnorm(fake_HSI, torch.ones(1, device = fake_HSI.device))/mean_vlaue1)
fake_HSI_pred = torch.squeeze(fake_HSI_pred)
fake_HSI_loss = self.criterion(fake_HSI_pred, fake_label)
# max_value = torch.amax(HSI,dim=[1,2,3],keepdim=True)
mean_vlaue = torch.mean(HSI, dim=[1, 2, 3], keepdim=True)
mean_vlaue2= mean_vlaue.detach()+1e-6
# print('shape of the real:{}'.format(HSI.shape))
real_HSI_pred = self.dishsi(self.gradnorm(HSI, torch.ones(1, device=HSI.device))/mean_vlaue2)
real_HSI_pred = torch.squeeze(real_HSI_pred)
real_HSI_loss = self.criterion(real_HSI_pred, real_label)
# GP_loss = self.calc_gradient_penalty(self.dishsi, HSI.detach()/mean_vlaue2, fake_HSI.detach()/mean_vlaue1, center=0, alpha=None, LAMBDA=10, device=real_HSI_pred.device)
GP_loss = fake_HSI_loss*0
# GP_loss = 0
# loss = fake_HSI_loss + real_HSI_loss #+ GP_loss # 改动了
loss = fake_HSI_loss + real_HSI_loss + GP_loss
# loss = loss_MSI + fake_MSI_loss + real_MSI_loss + fake_HSIU_pred + HSIU_pred + loss_HSIU + fake_HSI_loss + real_HSI_loss
self.iter +=1
if self.iter %20 ==0:
if rank == 0:
# print('[iter: {}/106][Dis loss:{:.4f}], [HSI fake loss:{:.2f}, real loss:{:.2f}, GP loss:{:.2f}]'.format(self.iter%106,loss.item(),fake_HSI_loss.item(),real_HSI_loss.item(),GP_loss.item()))
logging.info('[iter: {}/371][Dis loss:{:.4f}], [HSI fake loss:{:.2f}, real loss:{:.2f}, GP loss:{:.2f}]'.format(self.iter%371,loss.item(),fake_HSI_loss.item(),real_HSI_loss.item(),GP_loss.item()))
# print('[iter: {}/106][Dis loss:{:.4f}], [HSI fake loss:{:.2f}, real loss:{:.2f}]'.format(self.iter%106,loss.item(),fake_HSI_loss.item(),real_HSI_loss.item()))
return loss, GP_loss # 改动
def update_generator(self,MSI,HSI,img,rank, seg_label):
MSI1 = torch.clone(MSI) # 复制一份
# img = self.MSI2img(MSI)
[B,C,H,W] = HSI.shape # 调整大小与HSI一致
# spectral feature generation
# self.gen_optimizer.zero_grad() # 梯度清零
MSI1 = torch.nn.functional.interpolate(MSI,size=(int(H),int(W))) # 调整张量尺寸
gen_loss, fake_HSI, smooth, res_loss = self.Cal_generatorloss(MSI1,HSI,rank) # 获取相应损失值:生成器损失,生成的HSI,平滑损失和残差损失
# spatial feature generation
with torch.no_grad(): # 不需要计算梯度 这里是用于计算空间特征的
_,fea = self.HRNet(img) # 这里是用于计算HRNet的特征,返回值为输出特征,并赋值给fea
[B,C,H,W] = fea.shape # 获取fea特征的大小 这里是语义分割RGB图像
# spectral feature generation 这里的gradnorm计算梯度的范数 这里的大小是和fea特征的大小一致
fake_HSI = torch.nn.functional.interpolate(self.gradnorm(fake_HSI, torch.ones(1,device = fake_HSI.device)*2),size=(int(H),int(W)))
pred = self.extractor(Afea = fea, Efea = fake_HSI) # 这里是传入了鉴别器中,获取一个经过卷积操作的,获得一个中间特征
loss = self.criterion_class(pred, seg_label) # 这个损失是与标签之间的损失 这个可以理解为语义标签的
# 每训练400iter 都进行保存相应训练的mat文件
if self.iter% 400 == 0:
scio.savemat('./savefile/train_iter{}.mat'.format(self.iter),{'RGB':MSI.detach().cpu().numpy(),
'GenHSI':fake_HSI.detach().cpu().numpy(),
'HSI':HSI.detach().cpu().numpy()})
return gen_loss + loss, smooth, res_loss, gen_loss
def update_discriminator(self,MSI,HSI,rank):
# self.dis_optimizer.zero_grad() # 梯度置为0
MSI = torch.nn.functional.interpolate(MSI, size=(int(128),int(128))) # 对msi进行插值,大小调整为(128,128)
dis_loss, GP_loss = self.Cal_discriminatorloss(MSI,HSI,rank) # 计算判别器损失和梯度惩罚损失
# dis_loss.backward() # 反向传播判别器的损失
# dis_loss1 = dis_loss.item() # 记录判别器损失和梯度惩罚损失 改动了 由于要使用cgd
# self.dis_optimizer.step()
return dis_loss, GP_loss.item()
`
你好!上面的代码我看了,现在这个代码各种东西杂糅在一起,很难debug。有没有可能把不必要的东西删掉,找到能够复现错误的最小的代码片段呢?
哥,这个优化器定义,是正确的不?我尝试一下把不必要的东西进行删除
你好!上面的代码我看了,现在这个代码各种东西杂糅在一起,很难debug。有没有可能把不必要的东西删掉,找到能够复现错误的最小的代码片段呢?
哥,可以加您一个微信吗?
可以啊 微信直接发这里不好,发我邮箱[email protected] 吧