Paddle
Paddle copied to clipboard
【论文复现】反向对齐paddle模型梯度异常,梯度无法正常打印
bug描述 Describe the Bug
反向对齐过程中,打印paddle的模型参数梯度发现前面的几层大量参数的梯度为None
定位到梯度异常的layer以后,使用hook打印梯度发现异常
torch 代码
class MLM(nn.Module):
'''
Architecture of MLM
'''
def __init__(self, n_dim=512):
super(MLM, self).__init__()
self.MLM_SequenceModeling_mask = Transforme_Encoder(n_layers=2, n_position=256)
self.MLM_SequenceModeling_WCL = Transforme_Encoder(n_layers=1, n_position=256)
self.pos_embedding = nn.Embedding(25, 512)
self.w0_linear = nn.Linear(1, 256)
self.wv = nn.Linear(n_dim, n_dim)
self.active = nn.Tanh()
self.we = nn.Linear(n_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, input, label_pos, state=False):
# transformer unit for generating mask_c
feature_v_seq = self.MLM_SequenceModeling_mask(input, src_mask=None)
pos_emb = self.pos_embedding(label_pos.long())
pos_emb = self.w0_linear(torch.unsqueeze(pos_emb, dim=2)).permute(0, 2, 1)
# fusion position embedding with features V & generate mask_c
att_map_sub = self.active(pos_emb + self.wv(feature_v_seq))
att_map_sub = self.we(att_map_sub) # b,256,1
att_map_sub = self.sigmoid(att_map_sub.permute(0, 2, 1)) # b,1,256
att_map_sub.register_hook(lambda grad: print('att_map_sub backward', grad))
att_map_sub_t = att_map_sub.permute(0, 2, 1)
att_map_sub_t.register_hook(lambda grad: print('att_map_sub_t backward', grad))
f_res = input * (1 - att_map_sub_t) # second path with remaining string
f_sub = input * att_map_sub_t # first path with occluded character
## transformer units in WCL
f_res = self.MLM_SequenceModeling_WCL(f_res, src_mask=None)
f_res.register_hook(lambda grad: print('f_res backward', grad))
f_sub = self.MLM_SequenceModeling_WCL(f_sub, src_mask=None)
return f_res, f_sub, att_map_sub
在torch中,f_res backward、att_map_sub_t backward、att_map_sub backward均可正常打印梯度
paddle代码
class MLM(nn.Layer):
"""
MLM model
"""
def __init__(self, n_dim=512):
super(MLM, self).__init__()
self.MLM_SequenceModeling_mask = Transforme_Encoder(n_layers=2, n_position=256)
self.MLM_SequenceModeling_WCL = Transforme_Encoder(n_layers=1, n_position=256)
self.pos_embedding = nn.Embedding(25, 512)
self.w0_linear = nn.Linear(1, 256)
self.wv = nn.Linear(n_dim, n_dim)
self.active = nn.Tanh()
self.we = nn.Linear(n_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, input, label_pos, state=False):
# transformer unit for generating mask_c
feature_v_seq = self.MLM_SequenceModeling_mask(input, src_mask=None)
pos_emb = self.pos_embedding(paddle.to_tensor(label_pos, dtype="int64"))
pos_emb = self.w0_linear(paddle.unsqueeze(pos_emb, axis=2)).transpose((0, 2, 1))
# fusion position embedding with features V & generate mask_c
att_map_sub = self.active(pos_emb + self.wv(feature_v_seq))
att_map_sub = self.we(att_map_sub) # b,256,1
att_map_sub.register_hook(lambda grad: print('att_map_sub backward', grad))
att_map_sub = self.sigmoid(att_map_sub.transpose(perm=(0, 2, 1))) # b,1,256
att_map_sub_t = att_map_sub.transpose((0, 2, 1))
att_map_sub_t.register_hook(lambda grad: print('att_map_sub_t backward', grad))
f_res = input * (1 - att_map_sub_t) # second path with remaining string
f_res.register_hook(lambda grad: print('f_res backward', grad))
f_sub = input * att_map_sub_t # first path with occluded character
# transformer units in WCL
f_res_result = self.MLM_SequenceModeling_WCL(f_res, src_mask=None)
f_sub = self.MLM_SequenceModeling_WCL(f_sub, src_mask=None)
return f_res_result, f_sub, att_map_sub
在paddle中,仅打印f_res backward、att_map_sub_t backward, att_map_sub backward未正常打印梯度
其他补充信息 Additional Supplementary Information
No response
您好,我们已经收到了您的问题,会安排技术人员尽快解答您的问题,请耐心等待。请您再次检查是否提供了清晰的问题描述、复现代码、环境&版本、报错信息等。同时,您也可以通过查看官网API文档、常见问题、历史Issue、AI社区来寻求解答。祝您生活愉快~
Hi! We've received your issue and please be patient to get responded. We will arrange technicians to answer your questions as soon as possible. Please make sure that you have posted enough message to demo your request. You may also check out the API,FAQ,Github Issue and AI community to get the answer.Have a nice day!
请问有完整一点的代码吗?我在本地复现一下您的问题
请问有完整一点的代码吗?我在本地复现一下您的问题
我在aistudio上复现一下,等下把地址回复您
很奇怪,在aistudio上复现不出来,怀疑跟操作系统有关系
代码地址: https://aistudio.baidu.com/aistudio/projectdetail/4580735?sUid=48323&shared=1&ts=1663833714209
代码地址: https://aistudio.baidu.com/aistudio/projectdetail/4580735?sUid=48323&shared=1&ts=1663833714209
paddle_grad_windows7_cpu.txt是在本地运行打印的模型参数梯度结果,paddle_grad_gpu.txt是在aistudio中,使用GPU运行打印的模型参数梯度结果,paddle_grad_linux_cpu.txt是在在aistudio中,使用CPU运行打印的模型参数梯度结果