TensorRT icon indicating copy to clipboard operation
TensorRT copied to clipboard

🐛 [Bug] conv2d of sum results in "Expected orig_dims.nbDims > 2 to be true but got false. Unable to create convolution layer from node"

Open styler00dollar opened this issue 3 years ago • 4 comments

To Reproduce

# https://github.com/bilibili/ailab/blob/main/Real-CUGAN/VapourSynth/upcunet_v3_vs.py
import torch
import torch_tensorrt
from torch import nn as nn
from torch.nn import functional as F
import os,sys
import numpy as np
from tqdm import tqdm

class UNet1(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNet1, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, 3, 1, 0),
            nn.Conv2d(32, 64, 3, 1, 0),
        )
        self.conv1_down = nn.Conv2d(64, 64, 2, 2, 0)
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 0),
            nn.Conv2d(128, 64, 3, 1, 0),
        )
        self.conv2_up = nn.ConvTranspose2d(64, 64, 2, 2, 0)
        self.conv3 = nn.Conv2d(64, 64, 3, 1, 0)

    def forward(self, x):
        x1 = self.conv1(x)
        
        x2 = self.conv1_down(x1)
        x2 = self.conv2(x2)
        x2 = self.conv2_up(x2)

        x1 = F.pad(x1, (-4, -4, -4, -4))
        # error happens with adding this line
        x3 = self.conv3(x1 + x2)
        # or
        #x3 = self.conv3(torch.add(x1, x2))

        # works if the addition gets removed
        #x3 = self.conv3(x1)
        return x3

example_data = torch.rand(1,3,100,100).cuda()

unet1 = UNet1(3, 3)
unet1.cuda().eval()
print(unet1(example_data).shape)
unet1 = torch.jit.trace(unet1, [example_data])
#unet1 = torch.jit.script(unet1, [example_data])
model = torch_tensorrt.compile(unet1, inputs=[example_data], \
                enabled_precisions={torch.float}, truncate_long_and_double=True, require_full_compilation = False)

for i in tqdm(range(1000)):
  out = model(example_data)
torch.Size([1, 64, 86, 86])

---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

[<ipython-input-58-4bd42ca1cb1e>](https://localhost:8080/#) in <module>()
     48 unet1 = torch.jit.trace(unet1, [example_data])
     49 #unet1 = torch.jit.script(unet1, [example_data])
---> 50 model = torch_tensorrt.compile(unet1, inputs=[example_data],                 enabled_precisions={torch.float}, truncate_long_and_double=True, require_full_compilation = False)
     51 
     52 for i in tqdm(range(1000)):

1 frames

[/usr/local/lib/python3.7/dist-packages/torch_tensorrt/_compile.py](https://localhost:8080/#) in compile(module, ir, inputs, enabled_precisions, **kwargs)
     95             )
     96             ts_mod = torch.jit.script(module)
---> 97         return torch_tensorrt.ts.compile(ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs)
     98     elif target_ir == _IRType.fx:
     99         raise RuntimeError("fx is currently not supported")

[/usr/local/lib/python3.7/dist-packages/torch_tensorrt/ts/_compiler.py](https://localhost:8080/#) in compile(module, inputs, device, disable_tf32, sparse_weights, enabled_precisions, refit, debug, strict_types, capability, num_min_timing_iters, num_avg_timing_iters, workspace_size, max_batch_size, calibrator, truncate_long_and_double, require_full_compilation, min_block_size, torch_executed_ops, torch_executed_modules)
    117     }
    118 
--> 119     compiled_cpp_mod = _C.compile_graph(module._c, _parse_compile_spec(spec))
    120     compiled_module = torch.jit._recursive.wrap_cpp_module(compiled_cpp_mod)
    121     return compiled_module

RuntimeError: [Error thrown at core/conversion/converters/impl/conv_deconv.cpp:115] Expected orig_dims.nbDims > 2 to be true but got false
Unable to create convolution layer from node: %32 : Tensor = aten::_convolution(%input, %self.conv3.weight, %self.conv3.bias, %6, %5, %6, %8, %5, %9, %8, %8, %7, %7), scope: __module.conv3 # /usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py:443:0

It works if the kernel size gets reduced to 1 and padding gets removed.

import torch
import torch_tensorrt
from torch import nn as nn
from torch.nn import functional as F
import os,sys
import numpy as np
from tqdm import tqdm

class UNet1(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNet1, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, 1, 1, 0),
            nn.Conv2d(32, 64, 1, 1, 0),
        )
        self.conv1_down = nn.Conv2d(64, 64, 2, 2, 0)
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, 1, 1, 0),
            nn.Conv2d(128, 64, 1, 1, 0),
        )
        self.conv2_up = nn.ConvTranspose2d(64, 64, 2, 2, 0)
        self.conv3 = nn.Conv2d(64, 64, 3, 1, 0)

    def forward(self, x):
        x1 = self.conv1(x)
        
        x2 = self.conv1_down(x1)
        x2 = self.conv2(x2)
        x2 = self.conv2_up(x2)
        
        x3 = self.conv3(x1 + x2)
        return x3

example_data = torch.rand(1,3,100,100).cuda()

unet1 = UNet1(3, 3)
unet1.cuda().eval()
print(unet1(example_data).shape)
unet1 = torch.jit.trace(unet1, [example_data])
#unet1 = torch.jit.script(unet1, [example_data])
model = torch_tensorrt.compile(unet1, inputs=[example_data], \
                enabled_precisions={torch.float}, truncate_long_and_double=True, require_full_compilation = False)

for i in tqdm(range(1000)):
  out = model(example_data)
torch.Size([1, 64, 98, 98])

100%|██████████| 1000/1000 [00:00<00:00, 2417.14it/s]

Expected behavior

It should not crash, if there is no shape error.

Environment

  • Torch-TensorRT Version: 1.0
  • PyTorch Version: 1.10.2+cu113
  • Python version: 3.7
  • CUDA version: 11.1

styler00dollar avatar Feb 22 '22 21:02 styler00dollar

@peri044 can you take a look at this one?

narendasan avatar Mar 01 '22 02:03 narendasan

Hello! I'm experience the same issue

issamemari avatar Mar 01 '22 09:03 issamemari

This issue has not seen activity for 90 days, Remove stale label or comment or this will be closed in 10 days

github-actions[bot] avatar May 31 '22 00:05 github-actions[bot]

This issue has not seen activity for 90 days, Remove stale label or comment or this will be closed in 10 days

github-actions[bot] avatar Aug 30 '22 00:08 github-actions[bot]

This issue has not seen activity for 90 days, Remove stale label or comment or this will be closed in 10 days

github-actions[bot] avatar Dec 06 '22 00:12 github-actions[bot]

Bo can check on whether this is still an issue.

Christina-Young-NVIDIA avatar Dec 20 '22 02:12 Christina-Young-NVIDIA

This issue has not seen activity for 90 days, Remove stale label or comment or this will be closed in 10 days

github-actions[bot] avatar Mar 21 '23 00:03 github-actions[bot]