BERT-NER icon indicating copy to clipboard operation
BERT-NER copied to clipboard

How to create pretrained model?

Open heeh opened this issue 5 years ago • 3 comments

Thank you for the fantastic program. I would like to use custom trained model as an input for the Inference CPP program. However, the training program only produces pytorchmodel.bin How do we create bert_features.zip and bert_ner.zip?

heeh avatar Dec 26 '19 21:12 heeh

This is my method to generate these files:

import torch
import torch.nn as nn
from transformers import (BertConfig, BertForTokenClassification, BertModel, BertTokenizer)
import torch.nn.functional as F

class bert_script(BertForTokenClassification):
    def __init__(self, config):
        super(bert_script, self).__init__(config)
        self.bert = BertModel(config)

    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        sequence_output = self.bert(input_ids, token_type_ids, attention_mask, head_mask=None)[0]
        return sequence_output

config = BertConfig.from_json_file(path_to_bert_model + '/config.json')
bert = bert_script(config)
bert = bert_script.from_pretrained(path_to_bert_model)
bert_features = torch.jit.trace(bert, (torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long)))
bert_features.save('./base/bert_features.zip')

class ner_script(nn.Module):
    def __init__(self):
        super(ner_script, self).__init__()
        # self.classifier = nn.Linear(768, 28)
        self.classifier = bert.classifier

    def forward(self, sequence_output):
        logits = self.classifier(sequence_output)
        logits = F.softmax(logits,dim=2)
        return logits

ner = ner_script()
bert_ner = torch.jit.trace(ner, torch.rand(1, 64, 768))
bert_ner.save('./base/bert_ner.zip')

path_to_bert_model should be your Bert model path.

gycg avatar Dec 27 '19 07:12 gycg

Thank you so much.

heeh avatar Dec 29 '19 18:12 heeh

This is my method to generate these files:

import torch
import torch.nn as nn
from transformers import (BertConfig, BertForTokenClassification, BertModel, BertTokenizer)
import torch.nn.functional as F

class bert_script(BertForTokenClassification):
    def __init__(self, config):
        super(bert_script, self).__init__(config)
        self.bert = BertModel(config)

    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        sequence_output = self.bert(input_ids, token_type_ids, attention_mask, head_mask=None)[0]
        return sequence_output

config = BertConfig.from_json_file(path_to_bert_model + '/config.json')
bert = bert_script(config)
bert = bert_script.from_pretrained(path_to_bert_model)
bert_features = torch.jit.trace(bert, (torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long), torch.zeros(1, 64, dtype=torch.long)))
bert_features.save('./base/bert_features.zip')

class ner_script(nn.Module):
    def __init__(self):
        super(ner_script, self).__init__()
        # self.classifier = nn.Linear(768, 28)
        self.classifier = bert.classifier

    def forward(self, sequence_output):
        logits = self.classifier(sequence_output)
        logits = F.softmax(logits,dim=2)
        return logits

ner = ner_script()
bert_ner = torch.jit.trace(ner, torch.rand(1, 64, 768))
bert_ner.save('./base/bert_ner.zip')

path_to_bert_model should be your Bert model path.

You save my life ! Thanks!

KingAndQueen avatar Oct 27 '21 10:10 KingAndQueen