BERT-NER
BERT-NER copied to clipboard
How to create pretrained model?
Thank you for the fantastic program.
I would like to use custom trained model as an input for the Inference CPP program.
However, the training program only produces pytorchmodel.bin
How do we create bert_features.zip
and bert_ner.zip
?
This is my method to generate these files:
import torch
import torch.nn as nn
from transformers import (BertConfig, BertForTokenClassification, BertModel, BertTokenizer)
import torch.nn.functional as F
class bert_script(BertForTokenClassification):
def __init__(self, config):
super(bert_script, self).__init__(config)
self.bert = BertModel(config)
def forward(self, input_ids, token_type_ids=None, attention_mask=None):
sequence_output = self.bert(input_ids, token_type_ids, attention_mask, head_mask=None)[0]
return sequence_output
config = BertConfig.from_json_file(path_to_bert_model + '/config.json')
bert = bert_script(config)
bert = bert_script.from_pretrained(path_to_bert_model)
bert_features = torch.jit.trace(bert, (torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long)))
bert_features.save('./base/bert_features.zip')
class ner_script(nn.Module):
def __init__(self):
super(ner_script, self).__init__()
# self.classifier = nn.Linear(768, 28)
self.classifier = bert.classifier
def forward(self, sequence_output):
logits = self.classifier(sequence_output)
logits = F.softmax(logits,dim=2)
return logits
ner = ner_script()
bert_ner = torch.jit.trace(ner, torch.rand(1, 64, 768))
bert_ner.save('./base/bert_ner.zip')
path_to_bert_model
should be your Bert model path.
Thank you so much.
This is my method to generate these files:
import torch import torch.nn as nn from transformers import (BertConfig, BertForTokenClassification, BertModel, BertTokenizer) import torch.nn.functional as F class bert_script(BertForTokenClassification): def __init__(self, config): super(bert_script, self).__init__(config) self.bert = BertModel(config) def forward(self, input_ids, token_type_ids=None, attention_mask=None): sequence_output = self.bert(input_ids, token_type_ids, attention_mask, head_mask=None)[0] return sequence_output config = BertConfig.from_json_file(path_to_bert_model + '/config.json') bert = bert_script(config) bert = bert_script.from_pretrained(path_to_bert_model) bert_features = torch.jit.trace(bert, (torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long))) bert_features.save('./base/bert_features.zip') class ner_script(nn.Module): def __init__(self): super(ner_script, self).__init__() # self.classifier = nn.Linear(768, 28) self.classifier = bert.classifier def forward(self, sequence_output): logits = self.classifier(sequence_output) logits = F.softmax(logits,dim=2) return logits ner = ner_script() bert_ner = torch.jit.trace(ner, torch.rand(1, 64, 768)) bert_ner.save('./base/bert_ner.zip')
path_to_bert_model
should be your Bert model path.
You save my life ! Thanks!