onnx-coreml icon indicating copy to clipboard operation
onnx-coreml copied to clipboard

_add_conv_like_op with convolution on 3d tensors produces ambiguous names for expanded inputs and outputs

Open richard-vogl opened this issue 5 years ago • 1 comments

🐞Describe the bug

when converting the attached model, xcode complains about duplicate output names during validation:

validator error: Layer '27_ip_expand' produces an output named '27_expanded' which is also an output produced by the layer '28_expand'.

Trace

not applicable

To Reproduce

  • If a python script can reproduce the error, please paste the code snippet
import torch
import torch.nn as nn
import torch.nn.functional as torch_func
import onnx_coreml
import os
import onnx
import numpy as np


def causal_pad_size(dilation, filter_size):
    return int(dilation * (filter_size - 1))


def causal_pad(x, dilation, filter_size, dimension=2):
    if filter_size > 1:
        pad_size = causal_pad_size(dilation, filter_size)
        pads = [0 for _ in range(4)]
        pads[(dimension - 1) * 2] = pad_size
        x = torch_func.pad(x, pads, mode='constant')
    return x


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.seq_filter_len = 3
        self.kernel_size = 2

        dilation_channels = 2
        residual_channels = 4
        skip_channels = 8

        end_channels = 2
        classes = 2

        # input convolutions
        channels = 1
        out_channels = residual_channels
        self.start_conv = nn.Conv2d(in_channels=channels,
                                    out_channels=out_channels,
                                    kernel_size=(self.kernel_size, self.seq_filter_len),
                                    bias=False)

        self.start_bn = nn.BatchNorm2d(out_channels)
        channels = out_channels

        new_dilation = 1

        self.filter_conv = nn.Conv1d(in_channels=channels,
                                     out_channels=dilation_channels,
                                     dilation=new_dilation,
                                     kernel_size=self.kernel_size,
                                     bias=False)

        self.filter_bn = nn.BatchNorm1d(dilation_channels)

        self.gate_conv = nn.Conv1d(in_channels=channels,
                                   out_channels=dilation_channels,
                                   dilation=new_dilation,
                                   kernel_size=self.kernel_size,
                                   bias=False)

        self.residual_conv = nn.Conv1d(in_channels=dilation_channels,
                                       out_channels=residual_channels,
                                       kernel_size=1,
                                       bias=False)

        self.skip_conv = nn.Conv1d(in_channels=dilation_channels,
                                   out_channels=skip_channels,
                                   kernel_size=1,
                                   bias=False)

        self.end_conv_1 = nn.Conv1d(in_channels=skip_channels,
                                    out_channels=end_channels,
                                    kernel_size=1,
                                    bias=True)

        self.end_conv_2 = nn.Conv1d(in_channels=end_channels,
                                    out_channels=classes,
                                    kernel_size=1,
                                    bias=True)

        self.loss_fun = torch.nn.BCELoss()

    def forward(self, x):
        skip_lane = torch.zeros(1).to(x.device)

        x = x.unsqueeze(1)

        x = causal_pad(x, 1, self.kernel_size, 2)
        x = self.start_conv(x)
        x = self.start_bn(x)
        x = torch.tanh(x)

        x = x.squeeze(dim=3)  # remove feature dimension (should be 1 by now)

        residual = x

        x = causal_pad(x, 1, self.kernel_size, 1)
        f_out = self.filter_conv(x)
        f_out = self.filter_bn(f_out)
        f_out = torch.tanh(f_out)
        gate = self.gate_conv(x)
        gate = torch.sigmoid(gate)
        x = torch.mul(f_out, gate)

        skip = self.skip_conv(x)
        skip_lane = torch.add(skip_lane, skip)

        x = self.residual_conv(x)
        x = torch.add(x, residual)

        x = torch_func.relu(skip_lane)
        x = torch_func.relu(self.end_conv_1(x))

        x = self.end_conv_2(x)
        x = x.transpose(1, 2)
        return torch.softmax(x, 2)

    def loss(self, out, target):
        return self.loss_fun(out, target)


# fix for https://github.com/onnx/onnx-coreml/issues/498
def _convert_pad(builder, node, graph, err):
    '''
    convert to CoreML Padding / ConstantPadding Layer:
    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4397
    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1822
    '''
    mode = node.attrs.get('mode', 'constant')

    if mode == b'constant':   # fix here
        pads = node.attrs.get('pads')
        pads = np.reshape(pads, (2, -1)).T.flatten().tolist()  # fix here

        value = node.attrs.get('value', 0.0)
        builder.add_constant_pad(
            name=node.name,
            input_names=node.inputs,
            output_name=node.outputs[0],
            value=value,
            pad_to_given_output_size_mode=False,
            pad_amounts=pads
        )
    else:
        raise ValueError(f'mode {mode} is not supported')


def main():
    batch_size = 1
    receptive_field = 2
    input_feat_size = 3

    torch_model = Net()
    torch_model.eval()

    torch_x = torch.randn(batch_size, receptive_field, input_feat_size, requires_grad=True)
    torch_out = torch_model(torch_x)

    onnx_file_name = 'model.onnx'
    input_names = ['input']
    output_names = ['output']
    # Export the model
    torch.onnx.export(torch_model,  # model being run
                      torch_x,  # model input (or a tuple for multiple inputs)
                      onnx_file_name,  # where to save the model (can be a file or file-like object)
                      export_params=True,  # store the trained parameter weights inside the model file
                      # opset_version=10,  # the ONNX version to export the model to
                      do_constant_folding=True,  # whether to execute constant folding for optimization
                      input_names=input_names,  # the model's input names
                      output_names=output_names,  # the model's output names
                      # dynamic_axes={'input': {0: 'batch_size'},  # variable lenght axes
                      #               'output': {0: 'batch_size'}}
                      )

    onnx_model = onnx.load(onnx_file_name)

    coreml_file_name = 'model.mlmodel'
    cml = onnx_coreml.convert(onnx_model,
                              target_ios='13',
                              custom_conversion_functions={"Pad": _convert_pad})
    print(type(cml))
    cml.save(coreml_file_name)
    print('saved coreml model...')

    open('cml.spec.txt', "w").write(str(cml.get_spec()))


if __name__ == '__main__':
    main()


# _add_conv_like_op with rank 3 -> generates duplicate output names if in series  and fails in validation on xcode

System environment (please complete the following information):

  • coremltools version : 3.0
  • onnx-coreml version: 1.0
  • OS (e.g., MacOS, Linux): MacOS
  • macOS version (if applicable): 10.13.6 (17G65)
  • How you install python (anaconda, virtualenv, system): virtualenv
  • python version (e.g. 3.7): 3.6
  • any other relevant information:

Additional context

proposed fix: use node name instead of original output name to build the expanded output names:

Index: venv/lib/python3.6/site-packages/onnx_coreml/_operators_nd.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- venv/lib/python3.6/site-packages/onnx_coreml/_operators_nd.py	(date 1571925857000)
+++ venv/lib/python3.6/site-packages/onnx_coreml/_operators_nd.py	(date 1571925857000)
@@ -46,16 +46,17 @@
         add_func(node.inputs, node.outputs, params_dict=params_dict, builder=builder, node=node, graph=graph, err=err)
     elif rank == 3:
         axes = [0, 3]
+        expanded_input_name = node.name+'_ip_expanded'
         # Make 5d tensor
         builder.add_expand_dims(
             name=node.name+'_ip_expand',
             input_name=node.inputs[0],
-            output_name=node.inputs[0]+'_expanded',
+            output_name=expanded_input_name,
             axes=axes
         )
-        node.inputs[0] = node.inputs[0] + '_expanded'
+        node.inputs[0] = expanded_input_name
         output_name = node.outputs[0]
-        node.outputs[0] = output_name + '_expanded'
+        node.outputs[0] = node.name + '_op_expanded'
         # Add conversion op
         get_params_func(builder, node, graph, err, params_dict, axis='width')
         add_func(node.inputs, node.outputs, params_dict=params_dict, builder=builder, node=node, graph=graph, err=err)


richard-vogl avatar Oct 24 '19 14:10 richard-vogl

code is also affected by

https://github.com/onnx/onnx-coreml/issues/498

therefore the workaround is included

richard-vogl avatar Oct 24 '19 14:10 richard-vogl