Transformers-Tutorials
Transformers-Tutorials copied to clipboard
Size mismatch while loading the pretrained weights for retraining the layoutlmv2
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
[<ipython-input-23-2ca38188b5f5>](https://localhost:8080/#) in <module>()
4
5 model = LayoutLMv2ForTokenClassification.from_pretrained('/content/drive/MyDrive/resume_experiment/checkpoints',
----> 6 num_labels=len(labels))
7
8 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
1 frames
[/usr/local/lib/python3.7/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1363 pretrained_model_name_or_path,
1364 ignore_mismatched_sizes=ignore_mismatched_sizes,
-> 1365 _fast_init=_fast_init,
1366 )
1367
[/usr/local/lib/python3.7/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in _load_state_dict_into_model(cls, model, state_dict, pretrained_model_name_or_path, ignore_mismatched_sizes, _fast_init)
1510 if len(error_msgs) > 0:
1511 error_msg = "\n\t".join(error_msgs)
-> 1512 raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
1513
1514 if len(unexpected_keys) > 0:
RuntimeError: Error(s) in loading state_dict for LayoutLMv2ForTokenClassification:
size mismatch for classifier.weight: copying a param with shape torch.Size([47, 768]) from checkpoint, the shape in current model is torch.Size([48, 768]).
size mismatch for classifier.bias: copying a param with shape torch.Size([47]) from checkpoint, the shape in current model is torch.Size([48]).
This is the issue I am facing when I am loading the previously trained weights of my model to retrain it on new data, you can see in the code how I changed the loading of pretrained weights, is it the correct way of doing that?
from transformers import LayoutLMv2ForTokenClassification, AdamW
import torch
from tqdm.notebook import tqdm
**model = LayoutLMv2ForTokenClassification.from_pretrained('/content/drive/MyDrive/resume_experiment/checkpoints',
num_labels=len(labels))**
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = AdamW(model.parameters(), lr=5e-5)
global_step = 0
num_train_epochs = 10
#put the model in training mode
model.train()
for epoch in range(num_train_epochs):
print("Epoch:", epoch)
for batch in tqdm(train_dataloader):
# get the inputs;
input_ids = batch['input_ids'].to(device)
bbox = batch['bbox'].to(device)
image = batch['image'].to(device)
attention_mask = batch['attention_mask'].to(device)
token_type_ids = batch['token_type_ids'].to(device)
labels = batch['labels'].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(input_ids=input_ids,
bbox=bbox,
image=image,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
labels=labels)
loss = outputs.loss
# print loss every 100 steps
if global_step % 100 == 0:
print(f"Loss after {global_step} steps: {loss.item()}")
loss.backward()
optimizer.step()
global_step += 1
model.save_pretrained("/content/drive/MyDrive/resume_experiment/checkpoints_2")