Transformers-Tutorials
Transformers-Tutorials copied to clipboard
RunTimeError when creating spatial position embeddings
@NielsRogge, Hi Niels, the issue I am facing when running layoutlmv3 model on FunsdDataset return forward_call(*args, **kwargs) File "/Users/im/Documents/projects_dl/ocr_layoutlm/myenv1/lib/python3.9/site-packages/transformers-4.32.0.dev0-py3.9.egg/transformers/models/layoutlmv3/modeling_layoutlmv3.py", line 345, in forward embeddings = embeddings + spatial_position_embeddings RuntimeError: The size of tensor a (512) must match the size of tensor b (4) at non-singleton dimension 2
I have created custom dataset for funds annotations and images as below
class CustomFunsdDataSet(Dataset): def init(self, filepath, processor): self.words = [] self.bboxes = [] self.images = [] self.word_labels = []
self.target_labels = ["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
self.id2label = {k: v for k, v in enumerate(self.target_labels)}
self.label2id = {v: k for k, v in enumerate(self.target_labels)}
print(self.label2id)
for _, example in Funsd().generate_examples(filepath):
self.words.append(example["tokens"])
self.bboxes.append(example["bboxes"])
self.images.append(example["image"])
self.word_labels.append([self.label2id[tag] for tag in example["ner_tags"]])
self.encoding = processor(images=self.images, text=self.words, boxes=self.bboxes, word_labels=self.word_labels,
max_length=512,
padding="max_length",
truncation="longest_first", return_tensors='pt')
print(f"preprocessor result:\n {type(self.encoding)}, {self.encoding.keys()}")
def __len__(self):
return len(self.images)
def __getitem__(self, index) -> dict:
return {
"input_ids": torch.tensor(self.encoding["input_ids"][index], dtype=torch.int64),
"attention_mask": torch.tensor(self.encoding["attention_mask"][index], dtype=torch.int64),
"bbox": torch.tensor(self.encoding["bbox"], dtype=torch.int64),
"pixel_values": torch.tensor(self.encoding['pixel_values'], dtype=torch.float32),
"labels": torch.tensor(self.encoding['labels'], dtype=torch.int64)
}
train_dataset = CustomFunsdDataSet(os.path.join(path, train), processor) train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=2)
Processor defined as:
feature_extractor = LayoutLMv3FeatureExtractor(apply_ocr=False) tokenizer = LayoutLMv3TokenizerFast.from_pretrained( "/Users/madhavim/Documents/projects_dl/ocr_layoutlm/src_layoutlmv3/model_layoutlmv3", ignore_mismatched_sizes=True)
processor = LayoutLMv3Processor(tokenizer=tokenizer, feature_extractor=feature_extractor)
=== Upon training the Layoutlmv3Tokenforclassification model , runtime error of size mismatch is throwing when creating spatial position embedding.
File "/Users/im/Documents/projects_dl/ocr_layoutlm/myenv1/lib/python3.9/site-packages/transformers-4.32.0.dev0-py3.9.egg/transformers/models/layoutlmv3/modeling_layoutlmv3.py", line 345, in forward embeddings = embeddings + spatial_position_embeddings RuntimeError: The size of tensor a (512) must match the size of tensor b (4) at non-singleton dimension 2
I am not getting how to resolve this issue. Looking out for your help Thanks,