W2NER
W2NER copied to clipboard
关于loss的一点细节问题
首先感谢大佬一直耐心解答,这里还有一个小白问题。
论文用的损失函数是最小化NLLLoss,输出层进过log_softmax,相当于CE,代码里面用了CE。经过debug在loss计算这里有个细节问题没搞明白,举例来说就是:
对于单个样本 [{"sentence": ["高", "勇", ":", "男", "."], "ner": [{"index": [0, 1], "type": "NAME"}]}]
self.criterion = nn.CrossEntropyLoss()
loss = self.criterion(outputs[grid_mask2d], grid_labels[grid_mask2d])
这里的outputs:
tensor([[
[[-0.1269, 0.0936, 0.0886],
[-0.1313, 0.1275, -0.0575],
[-0.1363, 0.2277, -0.0101],
[-0.1140, 0.1451, -0.0784],
[-0.0398, 0.1617, 0.0216]],
[[-0.0301, 0.1754, -0.0975],
[-0.0598, 0.1516, -0.0733],
[-0.1660, 0.1438, -0.1318],
[-0.0147, 0.1013, -0.0922],
[-0.1157, 0.1345, -0.0949]],
[[-0.0915, 0.1561, -0.0942],
[-0.0473, 0.1095, -0.0061],
[-0.0165, 0.1962, -0.0320],
[-0.0023, 0.1783, -0.0167],
[-0.1187, 0.1059, -0.0661]],
[[-0.0955, 0.1578, -0.0460],
[-0.0712, 0.1429, -0.0485],
[-0.0644, 0.1258, -0.0506],
[-0.0561, 0.1410, -0.0847],
[-0.0998, 0.1425, -0.0439]],
[[-0.0691, 0.1318, -0.0904],
[-0.0887, 0.1824, -0.0507],
[-0.1377, 0.1282, -0.0286],
[-0.0786, 0.1199, -0.0145],
[-0.0994, 0.1300, -0.0485]]]], device='cuda:0',
grad_fn=<AddBackward0>)
grid_labels(金标签):
tensor([[
[0, 1, 0, 0, 0],
[2, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]], device='cuda:0')
经过(outputs[grid_mask2d], grid_labels[grid_mask2d])
通过张量索引去除padding的位置,传入CE的input和target
def forward(self, input: Tensor, target: Tensor) -> Tensor:
return F.cross_entropy(input, target, weight=self.weight,
ignore_index=self.ignore_index, reduction=self.reduction,
label_smoothing=self.label_smoothing)
input:
tensor([
[-0.1269, 0.0936, 0.0886],
[-0.1313, 0.1275, -0.0575],
[-0.1363, 0.2277, -0.0101],
[-0.1140, 0.1451, -0.0784],
[-0.0398, 0.1617, 0.0216],
[-0.0301, 0.1754, -0.0975],
[-0.0598, 0.1516, -0.0733],
[-0.1660, 0.1438, -0.1318],
[-0.0147, 0.1013, -0.0922],
[-0.1157, 0.1345, -0.0949],
[-0.0915, 0.1561, -0.0942],
[-0.0473, 0.1095, -0.0061],
[-0.0165, 0.1962, -0.0320],
[-0.0023, 0.1783, -0.0167],
[-0.1187, 0.1059, -0.0661],
[-0.0955, 0.1578, -0.0460],
[-0.0712, 0.1429, -0.0485],
[-0.0644, 0.1258, -0.0506],
[-0.0561, 0.1410, -0.0847],
[-0.0998, 0.1425, -0.0439],
[-0.0691, 0.1318, -0.0904],
[-0.0887, 0.1824, -0.0507],
[-0.1377, 0.1282, -0.0286],
[-0.0786, 0.1199, -0.0145],
[-0.0994, 0.1300, -0.0485]], device='cuda:0',
grad_fn=<IndexBackward0>)
target:
tensor([0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')
这怎么计算CE啊,CE不是标签里面只能有0和1?这里还有实体类型2这个标签?一直没想明白