MAML-Pytorch
MAML-Pytorch copied to clipboard
准确率不变
我使用ALBERT和孪生网络来训练一个主观问题评分模型,训练策略参考的你的代码,孪生网络由双向LSTM和全连接层组成。在训练中,我发现准确率没有提高,一直保持不变。我感觉像是权重没有更新,可能是因为梯度太小导致了权重变化不大。或者,训练策略可能存在问题,但我不确定具体原因。下面是我训练期时的准确率:
`
class MetaTask(nn.Module): def init(self, args): super(MetaTask, self).init() self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.loss_fn = nn.CrossEntropyLoss() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.finetunning_lr = args.finetunning_lr self.n_way = args.n_way self.k_spt = args.k_spt self.k_qry = args.k_qry self.task_num = args.task_num self.update_step = args.update_step self.update_step_test = args.update_step_test self.net = SubjectiveGradingModel().to(self.device) self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
def forward(self, support_x, support_y, query_x, query_y):
task_num = len(support_x)
querysz = len(query_x[0])
losses_q = [0 for _ in range(self.update_step + 1)]
corrects = [0 for _ in range(self.update_step + 1)]
for i in range(task_num):
self.net.train()
# 1. run the i-th task and compute loss for k=0
logits = self.net(support_x[i])
loss = self.loss_fn(logits, torch.cat(support_y[i], dim=0).long())
fast_weights = OrderedDict(self.net.named_parameters())
grad = torch.autograd.grad(loss, fast_weights.values(), retain_graph=True)
# 输出梯度为None的参数
# for (name, param), gra in zip(self.net.named_parameters(), grad):
# if gra is None:
# print("梯度为None的参数:", name)
fast_weights = OrderedDict(
(name, param - self.update_lr * grad)
for ((name, param), grad) in zip(fast_weights.items(), grad)
)
# this is the loss and accuracy before first update
with torch.no_grad():
self.net.eval()
logits_q = self.net(query_x[i])
loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
losses_q[0] += loss_q
pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()
corrects[0] = corrects[0] + correct
# this is the loss and accuracy after the first update
with torch.no_grad():
self.net.eval()
self.net.load_state_dict(fast_weights, strict=False)
logits_q = self.net(query_x[i])
loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
losses_q[1] += loss_q
pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()
corrects[1] = corrects[1] + correct
self.net.train()
for k in range(1, self.update_step):
# 1. run the i-th task and compute loss for k=1~K-1
self.net.load_state_dict(fast_weights, strict=False)
logits = self.net(support_x[i])
loss = self.loss_fn(logits, torch.cat(support_y[i], dim=0).long())
# 2. compute grad on theta_pi
fast_weights = OrderedDict(self.net.named_parameters())
grad = torch.autograd.grad(loss, fast_weights.values(), retain_graph=True)
# 3. theta_pi = theta_pi - train_lr * grad
fast_weights = OrderedDict(
(name, param - self.update_lr * grad)
for ((name, param), grad) in zip(fast_weights.items(), grad)
)
self.net.load_state_dict(fast_weights, strict=False)
logits_q = self.net(query_x[i])
# loss_q will be overwritten and just keep the loss_q on last update step.
loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
losses_q[k + 1] += loss_q
with torch.no_grad():
pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item() # convert to numpy
corrects[k + 1] = corrects[k + 1] + correct
loss_q = losses_q[-1] / task_num
# optimize theta parameters
self.meta_optim.zero_grad()
loss_q.backward(retain_graph=True)
# print('meta update')
self.meta_optim.step()
accs = np.array(corrects) / (querysz * task_num)
return accs
class SubjectiveGradingModel(nn.Module): def init(self, hidden_size=384): super(SubjectiveGradingModel, self).init()
# 加载预训练的BERT模型和分词器
self.bert = AlbertModel.from_pretrained('src/datamoudle/model/albert_chinese_small')
# 孪生网络
self.siamese_network = Siamese(max_length=378, embedding_size=hidden_size)
def forward(self, input_data ,weights=None):
# 将每个字典中的数据拆分成单独的列表
input_ids_list = [item['input_ids'].squeeze(0).squeeze(0) for item in input_data]
token_type_ids_list = [item['token_type_ids'].squeeze(0).squeeze(0) for item in input_data]
attention_mask_list = [item['attention_mask'].squeeze(0).squeeze(0) for item in input_data]
answer_input_ids_list = [item['answer_input_ids'].squeeze(0).squeeze(0) for item in input_data]
answer_token_type_ids_list = [item['answer_token_type_ids'].squeeze(0).squeeze(0) for item in input_data]
answer_attention_mask_list = [item['answer_attention_mask'].squeeze(0).squeeze(0) for item in input_data]
# 转换成 PyTorch 张量
input_ids = torch.stack(input_ids_list)
token_type_ids = torch.stack(token_type_ids_list)
attention_mask = torch.stack(attention_mask_list)
answer_input_ids = torch.stack(answer_input_ids_list)
answer_token_type_ids = torch.stack(answer_token_type_ids_list)
answer_attention_mask = torch.stack(answer_attention_mask_list)
outputs = self.bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
pooled_output = outputs.last_hidden_state
cls_output = outputs.pooler_output
outputs_answer = self.bert(input_ids=answer_input_ids, token_type_ids=answer_token_type_ids, attention_mask=answer_attention_mask)
pooled_output_answer = outputs_answer.last_hidden_state
cls_output_answer = outputs_answer.pooler_output
siamese_output = self.siamese_network(pooled_output, pooled_output_answer, cls_output, cls_output_answer)
return siamese_output
` 这会是什么原因?
兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化
兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化
兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化
我的问题是数据集的问题,不是模型的问题。数据集当时有些标点符号不对,我整理了一下就好了
---- 回复的原邮件 ---- | 发件人 | @.> | | 日期 | 2024年06月13日 10:49 | | 收件人 | @.> | | 抄送至 | Jones @.>@.> | | 主题 | Re: [dragen1860/MAML-Pytorch] 准确率不变 (Issue #75) |
兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>