iree icon indicating copy to clipboard operation
iree copied to clipboard

[hf-model] error: expected sizes to be non-negative, but got -1

Open pdhirajkumarprasad opened this issue 1 year ago • 1 comments

What happened?

For the given IR


module {
  func.func @main_graph(%arg0: !torch.vtensor<[?,?,?,?],f32>, %arg1: !torch.vtensor<[11,1,1,384],f32>, %arg2: !torch.vtensor<[?,?,?,?],f32>, %arg3:!torch.vtensor<[11,1,100,384],f32>, %arg4: !torch.vtensor<[?,?,?],f32>) -> !torch.vtensor<[11,1,?,384],f32>  attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 21 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} {
    %136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<1.0> : tensor<11x1x1x384xf32>} : () -> !torch.vtensor<[11,1,1,384],f32> 
    %137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<1.0> : tensor<11x1x100x384xf32>} : () -> !torch.vtensor<[11,1,100,384],f32> 
    %138 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<1.0> : tensor<11x384x32x54xf32>} : () -> !torch.vtensor<[11,384,32,54],f32> 
    %139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<1> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> 
    %none = torch.constant.none
    %219 = torch.operator "onnx.Shape"(%arg0) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__1> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %221 = torch.operator "onnx.Gather"(%219, %220) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %223 = torch.operator "onnx.Shape"(%arg0) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__3> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %225 = torch.operator "onnx.Gather"(%223, %224) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %270 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__23> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %271 = torch.operator "onnx.Div"(%221, %270) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %274 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__24> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %275 = torch.operator "onnx.Div"(%225, %274) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %283 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__27> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__28> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %287 = torch.operator "onnx.Concat"(%283, %285) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[2],si64> 
    %302 = torch.operator "onnx.Cast"(%287) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[2],si64>) -> !torch.vtensor<[2],si64> 
    %303 = torch.operator "onnx.Concat"(%139, %302) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[2],si64>) -> !torch.vtensor<[4],si64> 
    %304 = torch.operator "onnx.Resize"(%138, %none, %none, %303) {torch.onnx.coordinate_transformation_mode = "half_pixel", torch.onnx.cubic_coeff_a = -7.500000e-01 : f32, torch.onnx.mode = "cubic", torch.onnx.nearest_mode = "floor"} : (!torch.vtensor<[11,384,32,54],f32>, !torch.none, !torch.none, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],f32> 
    %305 = torch.operator "onnx.Shape"(%304) : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %306 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__33> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__34> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__35> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %309 = torch.operator "onnx.Slice"(%305, %307, %308, %306) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[2],si64> 
    %310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__36> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %311 = torch.operator "onnx.Concat"(%309, %310) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> 
    %312 = torch.operator "onnx.Reshape"(%304, %311) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],f32> 
    %313 = torch.operator "onnx.Transpose"(%312) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64]} : (!torch.vtensor<[?,?,?],f32>) -> !torch.vtensor<[?,?,?],f32> 
    %314 = torch.operator "onnx.Mul"(%271, %275) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %315 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__37> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %316 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__38> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__39> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %318 = torch.operator "onnx.Unsqueeze"(%314, %317) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %319 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__40> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %320 = torch.operator "onnx.Concat"(%315, %316, %318, %319) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> 
    %321 = torch.operator "onnx.Reshape"(%313, %320) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?],f32>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],f32> 
    %322 = torch.operator "onnx.Concat"(%136, %321, %137) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[11,1,1,384],f32>, !torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[11,1,100,384],f32>) -> !torch.vtensor<[11,1,?,384],f32> 
    return %322: !torch.vtensor<[11,1,?,384],f32>
  }
}

{-#
  dialect_resources: {
    builtin: {
      __1: "0x080000000200000000000000",
      __3: "0x080000000300000000000000",
      __23: "0x080000001000000000000000",
      __24: "0x080000001000000000000000",
      __27: "0x080000000000000000000000",
      __28: "0x080000000000000000000000",
      __33: "0x080000000000000000000000",
      __34: "0x080000000000000000000000",
      __35: "0x080000000200000000000000",
      __36: "0x08000000FFFFFFFFFFFFFFFF",
      __37: "0x080000000B00000000000000",
      __38: "0x080000000100000000000000",
      __39: "0x080000000000000000000000",
      __40: "0x080000008001000000000000"
    }
  }
#-}

getting error as

error: expected sizes to be non-negative, but got -1
    %322 = torch.operator "onnx.Concat"(%136, %321, %137) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[11,1,1,384],f32>, !torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[11,1,100,384],f32>) -> !torch.vtensor<[11,1,?,384],f32>

during iree-flow-canonicalization post CSE.

command:

iree-compile tt.mlir --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-cpu=host -o abc.vmfb

IREE version: IREE compiler version 3.1.0rc20241217 @ 362b554894c46021d32749bf01c9c4410f8cbbc4

model: From HF top 1000 most downloaded models(hf_yolos-small-finetuned-license-plate-detection)

dump with '--mlir-print-ir-after-all --mlir-print-ir-before-all --mlir-disable-threading --mlir-elide-elementsattrs-if-larger=4'

dump.log

Steps to reproduce your issue

  1. Go to '...'
  2. Click on '....'
  3. Scroll down to '....'
  4. See error

What component(s) does this issue relate to?

Compiler

Version information

No response

Additional context

No response

pdhirajkumarprasad avatar Dec 17 '24 13:12 pdhirajkumarprasad

TLDR: the input IR is invalid.

%304 = torch.operator "onnx.Resize"(%138, %none, %none, %303) {torch.onnx.coordinate_transformation_mode = "half_pixel", torch.onnx.cubic_coeff_a = -7.500000e-01 : f32, torch.onnx.mode = "cubic", torch.onnx.nearest_mode = "floor"} : (!torch.vtensor<[11,384,32,54],f32>, !torch.none, !torch.none, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],f32>

becomes:

%28 = torch.vtensor.literal(dense<0> : tensor<1xsi64>) : !torch.vtensor<[1],si64>
  %29 = torch.vtensor.literal(dense<0> : tensor<1xsi64>) : !torch.vtensor<[1],si64>
  %30 = torch.prim.ListConstruct %28, %29 : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.list<vtensor>
  %int0_4 = torch.constant.int 0
  %31 = torch.aten.cat %30, %int0_4 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2],si64>
  %int4 = torch.constant.int 4
  %none_5 = torch.constant.none
  %false = torch.constant.bool false
  %32 = torch.aten.to.dtype %31, %int4, %false, %false, %none_5 : !torch.vtensor<[2],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2],si64>
  %33 = torch.prim.ListConstruct %3, %32 : (!torch.vtensor<[2],si64>, !torch.vtensor<[2],si64>) -> !torch.list<vtensor>
  %int0_6 = torch.constant.int 0
  %34 = torch.aten.cat %33, %int0_6 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4],si64>
  %none_7 = torch.constant.none
  %false_8 = torch.constant.bool false
  %true = torch.constant.bool true
  %str = torch.constant.str "cubic"
  %int0_9 = torch.constant.int 0
  %int2 = torch.constant.int 2
  %35 = torch.aten.select.int %34, %int0_9, %int2 : !torch.vtensor<[4],si64>, !torch.int, !torch.int -> !torch.vtensor<[1],si64>
  %36 = torch.aten.item %35 : !torch.vtensor<[1],si64> -> !torch.int
  %int3 = torch.constant.int 3
  %37 = torch.aten.select.int %34, %int0_9, %int3 : !torch.vtensor<[4],si64>, !torch.int, !torch.int -> !torch.vtensor<[1],si64>
  %38 = torch.aten.item %37 : !torch.vtensor<[1],si64> -> !torch.int
  %39 = torch.prim.ListConstruct %36, %38 : (!torch.int, !torch.int) -> !torch.list<int>
  %40 = torch.aten.__interpolate.size_list_scale_list %2, %39, %none_7, %str, %false_8, %none_7, %false_8 : !torch.vtensor<[11,384,32,54],f32>, !torch.list<int>, !torch.none, !torch.str, !torch.bool, !torch.none, !torch.bool -> !torch.vtensor<[?,?,?,?],f32>

which simplifies to:

 %22 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
 %23 = torch.shape.calculate {
    %100 = torch.aten.__interpolate.size_list_scale_list %13, %22, %none, %str_3, %false, %none, %false : !torch.vtensor<[11,384,32,54],f32>, !torch.list<int>, !torch.none, !torch.str, !torch.bool, !torch.none, !torch.bool -> !torch.vtensor<[11,384,0,0],f32>
    torch.shape.calculate.yield %100 : !torch.vtensor<[11,384,0,0],f32>
  } shapes {
    %100 = torch.prim.ListConstruct %int11, %int384, %int0, %int0 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    torch.shape.calculate.yield.shapes %100 : !torch.list<int>
  } : !torch.vtensor<[11,384,0,0],f32>

The arguments to interpolate are invalid, %22 here is a list of (0,0), resulting in an invalid shape with 0s in ranks 2 and 3.

It is semantically incorrect in pytorch also:

>>> a = torch.ones([11,384,32,54])
>>> b = torch.nn.functional.interpolate(a, (0,0), None, "bicubic", False, None, False)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/dan/shark-ai/.env/lib/python3.12/site-packages/torch/nn/functional.py", line 4594, in interpolate
    return torch._C._nn.upsample_bicubic2d(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Input and output sizes should be greater than 0, but got input (H: 32, W: 54) output (H: 0, W: 0)
>>> 

This means either the model export in onnx or the lowering in the convert-torch-onnx-to-torch pass is incorrect

dan-garvey avatar Jun 19 '25 23:06 dan-garvey

I am Looking into it.

HalfBloodPrince010 avatar Jul 19 '25 05:07 HalfBloodPrince010

This reproducer is likely not sufficient to narrow down the issue. It looks more like an issue with shape inference, and not knowing the source for %arg0 may be problematic.

I'd recommend starting with the full model to debug this issue.

zjgarvey avatar Aug 19 '25 17:08 zjgarvey

Some context https://discord.com/channels/689900678990135345/1398431144210333818

dan-garvey avatar Aug 19 '25 19:08 dan-garvey