transformers icon indicating copy to clipboard operation
transformers copied to clipboard

AttributeError: 'LayoutLMForTokenClassification' object has no attribute 'config'

Open blueprintparadise opened this issue 1 year ago • 0 comments

System Info

Adding image embeddings to layoutLM makes the model unconvertable After following the - https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Add_image_embeddings_to_LayoutLM.ipynb I wanted to convert the .pt model to onnx. The issue is that the changes made in the notebook do not allow for the model conversion to work.

New model - --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- import torch.nn as nn from transformers.models.layoutlm import LayoutLMModel, LayoutLMConfig from transformers.modeling_outputs import TokenClassifierOutput import torchvision from torchvision.ops import RoIAlign class LayoutLMForTokenClassification(nn.Module): def init(self, output_size=(3,3), spatial_scale=14/224, sampling_ratio=2 ): super().init()

    # LayoutLM base model + token classifier
    self.num_labels = len(label2idx)
    self.layoutlm = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=self.num_labels)
    self.dropout = nn.Dropout(self.layoutlm.config.hidden_dropout_prob)
    self.classifier = nn.Linear(self.layoutlm.config.hidden_size, self.num_labels)

    # backbone + roi-align + projection layer
    model = torchvision.models.resnet101(pretrained=True)
    self.backbone = nn.Sequential(*(list(model.children())[:-3]))
    self.roi_align = RoIAlign(output_size, spatial_scale=spatial_scale, sampling_ratio=sampling_ratio)
    self.projection = nn.Linear(in_features=1024*3*3, out_features=self.layoutlm.config.hidden_size)

def forward(
    self,
    input_ids,
    bbox,
    attention_mask,
    token_type_ids,
    position_ids=None,
    head_mask=None,
    inputs_embeds=None,
    labels=None,
    resized_images=None, # shape (N, C, H, W), with H = W = 224
    resized_and_aligned_bounding_boxes=None, # single torch tensor that also contains the batch index for every bbox at image size 224
    output_attentions=None,
    output_hidden_states=None,
    return_dict=None,
):
    r"""
    labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
        Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
        1]``.

    """
    return_dict = return_dict if return_dict is not None else self.layoutlm.config.use_return_dict

    # first, forward pass on LayoutLM
    outputs = self.layoutlm(
        input_ids=input_ids,
        bbox=bbox,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict,
    )

    sequence_output = outputs[0]

    # next, send resized images of shape (batch_size, 3, 224, 224) through backbone to get feature maps of images 
    # shape (batch_size, 1024, 14, 14)
    feature_maps = self.backbone(resized_images)
    
    # next, use roi align to get feature maps of individual (resized and aligned) bounding boxes
    # shape (batch_size*seq_len, 1024, 3, 3)
    device = input_ids.device
    resized_bounding_boxes_list = []
    for i in resized_and_aligned_bounding_boxes:
      resized_bounding_boxes_list.append(i.float().to(device))
            
    feat_maps_bboxes = self.roi_align(input=feature_maps, 
                                    # we pass in a list of tensors
                                    # We have also added -0.5 for the first two coordinates and +0.5 for the last two coordinates,
                                    # see https://stackoverflow.com/questions/60060016/why-does-roi-align-not-seem-to-work-in-pytorch
                                    rois=resized_bounding_boxes_list
                       )  
  
    # next, reshape  + project to same dimension as LayoutLM. 
    batch_size = input_ids.shape[0]
    seq_len = input_ids.shape[1]
    feat_maps_bboxes = feat_maps_bboxes.view(batch_size, seq_len, -1) # Shape (batch_size, seq_len, 1024*3*3)
    projected_feat_maps_bboxes = self.projection(feat_maps_bboxes) # Shape (batch_size, seq_len, hidden_size)

    # add those to the sequence_output - shape (batch_size, seq_len, hidden_size)
    sequence_output += projected_feat_maps_bboxes

    sequence_output = self.dropout(sequence_output)
    logits = self.classifier(sequence_output)

    loss = None
    if labels is not None:
        loss_fct = nn.CrossEntropyLoss()

        if attention_mask is not None:
            active_loss = attention_mask.view(-1) == 1
            active_logits = logits.view(-1, self.num_labels)[active_loss]
            active_labels = labels.view(-1)[active_loss]
            loss = loss_fct(active_logits, active_labels)
        else:
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

    if not return_dict:
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

    return TokenClassifierOutput(
        loss=loss,
        logits=logits,
        hidden_states=outputs.hidden_states,
        attentions=outputs.attentions,
    )

ERROR ------------------------------------------------------------------------------------------------------------------------------------

  3 from transformers.onnx import export
  4 def save_onnx(save_path):
  5         onnx_config = LayoutLMOnnxConfig(model.config)
  6         export(preprocessor=tokenizer, model=model.cpu(), config=onnx_config, output=Path(save_path),opset=11)

             [/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py](https://localhost:8080/#) in __getattr__(self, name)

1206 return modules[name] 1207 raise AttributeError("'{}' object has no attribute '{}'".format( 1208 type(self).name, name)) 1209 1210 def setattr(self, name: str, value: Union[Tensor, 'Module']) -> None:

AttributeError: 'LayoutLMForTokenClassification' object has no attribute 'config'

Please help.@NielsRogge

Who can help?

@NielsRogge @SaulLu

Information

  • [ ] The official example scripts
  • [X] My own modified scripts

Tasks

  • [ ] An officially supported task in the examples folder (such as GLUE/SQuAD, ...)
  • [X] My own task or dataset (give details below)

Reproduction

Step 1 . Run this notebook - https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Add_image_embeddings_to_LayoutLM.ipynb?authuser=4#scrollTo=Vr4sG80hu6rC

Step 2 - Run the model conversion code - from pathlib import Path from transformers.models.layoutlm import LayoutLMOnnxConfig from transformers.onnx import export def save_onnx(save_path): onnx_config = LayoutLMOnnxConfig(model.config) export(preprocessor=tokenizer, model=model.cpu(), config=onnx_config, output=Path(save_path),opset=11)

print("Save model as ONNX") save_onnx('/content/data/model/model.onnx')

I have also tried this method, but the output is blank.-------------------------------------------------------

def save_onnx(save_path): configuration = LayoutLMConfig() onnx_config = LayoutLMOnnxConfig(configuration) export(preprocessor=tokenizer, model=model.cpu(), config=onnx_config, output=Path(save_path),opset=11)

Please let me know if you will need anything else.

Expected behavior

The converted onnx model is produced in the instructed directory

blueprintparadise avatar Aug 09 '22 05:08 blueprintparadise