heir ONNX to MLIR converter

I've stumbled across an ONNX to MLIR converter and used it to translate a ResNet18 ONNX model into an MLIR file. I found the guide how to do this here.

The MLIR file uses a custom "top" dialect. So I'm not sure if this helps us to translate ML models into MLIR files for HEIR. That's the MLIR file I get (I removed the locs at the end).

module @resnet18 attributes {module.FLOPs = 3635523560 : i64, module.chip = "ALL", module.platform = "ONNX", module.state = "TOP_F32", module.weight_file = "resnet18_top_f32_all_weight.npz"} {
  func.func @main(%arg0: tensor<1x3x224x224xf32> loc(unknown)) -> tensor<1x1000xf32> {
    %0 = "top.Input"(%arg0) {channel_format = "nchw", keep_aspect_ratio = false, keep_ratio_mode = "letterbox", mean = [123.67500305175781, 116.27999877929688, 103.52999877929688], pad_type = "center", pad_value = 0 : i64, pixel_format = "rgb", resize_dims = [256, 256], scale = [0.017100000753998756, 0.017500000074505806, 0.017400000244379044]} : (tensor<1x3x224x224xf32>) -> tensor<1x3x224x224xf32> loc(#loc1)
    %1 = "top.Weight"() : () -> tensor<64x3x7x7xf32> loc(#loc2)
    %2 = "top.Weight"() : () -> tensor<64xf32> loc(#loc3)
    %3 = "top.Conv"(%0, %1, %2) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [7, 7], pads = [3, 3, 3, 3], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x3x224x224xf32>, tensor<64x3x7x7xf32>, tensor<64xf32>) -> tensor<1x64x112x112xf32> loc(#loc4)
    %4 = "top.MaxPool"(%3) {count_include_pad = false, do_relu = false, keepdims = true, kernel_shape = [3, 3], pad_value = 0 : i64, pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2]} : (tensor<1x64x112x112xf32>) -> tensor<1x64x56x56xf32> loc(#loc5)
    %5 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc6)
    %6 = "top.Weight"() : () -> tensor<64xf32> loc(#loc7)
    %7 = "top.Conv"(%4, %5, %6) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc8)
    %8 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc9)
    %9 = "top.Weight"() : () -> tensor<64xf32> loc(#loc10)
    %10 = "top.Conv"(%7, %8, %9) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc11)
    %11 = "top.Add"(%4, %10) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc12)
    %12 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc13)
    %13 = "top.Weight"() : () -> tensor<64xf32> loc(#loc14)
    %14 = "top.Conv"(%11, %12, %13) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc15)
    %15 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc16)
    %16 = "top.Weight"() : () -> tensor<64xf32> loc(#loc17)
    %17 = "top.Conv"(%14, %15, %16) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc18)
    %18 = "top.Add"(%11, %17) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc19)
    %19 = "top.Weight"() : () -> tensor<128x64x1x1xf32> loc(#loc20)
    %20 = "top.Weight"() : () -> tensor<128xf32> loc(#loc21)
    %21 = "top.Conv"(%18, %19, %20) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [1, 1], pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<128x64x1x1xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc22)
    %22 = "top.Weight"() : () -> tensor<128x64x3x3xf32> loc(#loc23)
    %23 = "top.Weight"() : () -> tensor<128xf32> loc(#loc24)
    %24 = "top.Conv"(%18, %22, %23) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<128x64x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc25)
    %25 = "top.Weight"() : () -> tensor<128x128x3x3xf32> loc(#loc26)
    %26 = "top.Weight"() : () -> tensor<128xf32> loc(#loc27)
    %27 = "top.Conv"(%24, %25, %26) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc28)
    %28 = "top.Add"(%21, %27) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc29)
    %29 = "top.Weight"() : () -> tensor<128x128x3x3xf32> loc(#loc30)
    %30 = "top.Weight"() : () -> tensor<128xf32> loc(#loc31)
    %31 = "top.Conv"(%28, %29, %30) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc32)
    %32 = "top.Weight"() : () -> tensor<128x128x3x3xf32> loc(#loc33)
    %33 = "top.Weight"() : () -> tensor<128xf32> loc(#loc34)
    %34 = "top.Conv"(%31, %32, %33) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc35)
    %35 = "top.Add"(%28, %34) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc36)
    %36 = "top.Weight"() : () -> tensor<256x128x1x1xf32> loc(#loc37)
    %37 = "top.Weight"() : () -> tensor<256xf32> loc(#loc38)
    %38 = "top.Conv"(%35, %36, %37) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [1, 1], pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<256x128x1x1xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc39)
    %39 = "top.Weight"() : () -> tensor<256x128x3x3xf32> loc(#loc40)
    %40 = "top.Weight"() : () -> tensor<256xf32> loc(#loc41)
    %41 = "top.Conv"(%35, %39, %40) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<256x128x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc42)
    %42 = "top.Weight"() : () -> tensor<256x256x3x3xf32> loc(#loc43)
    %43 = "top.Weight"() : () -> tensor<256xf32> loc(#loc44)
    %44 = "top.Conv"(%41, %42, %43) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc45)
    %45 = "top.Add"(%38, %44) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc46)
    %46 = "top.Weight"() : () -> tensor<256x256x3x3xf32> loc(#loc47)
    %47 = "top.Weight"() : () -> tensor<256xf32> loc(#loc48)
    %48 = "top.Conv"(%45, %46, %47) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc49)
    %49 = "top.Weight"() : () -> tensor<256x256x3x3xf32> loc(#loc50)
    %50 = "top.Weight"() : () -> tensor<256xf32> loc(#loc51)
    %51 = "top.Conv"(%48, %49, %50) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc52)
    %52 = "top.Add"(%45, %51) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc53)
    %53 = "top.Weight"() : () -> tensor<512x256x1x1xf32> loc(#loc54)
    %54 = "top.Weight"() : () -> tensor<512xf32> loc(#loc55)
    %55 = "top.Conv"(%52, %53, %54) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [1, 1], pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<512x256x1x1xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc56)
    %56 = "top.Weight"() : () -> tensor<512x256x3x3xf32> loc(#loc57)
    %57 = "top.Weight"() : () -> tensor<512xf32> loc(#loc58)
    %58 = "top.Conv"(%52, %56, %57) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<512x256x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc59)
    %59 = "top.Weight"() : () -> tensor<512x512x3x3xf32> loc(#loc60)
    %60 = "top.Weight"() : () -> tensor<512xf32> loc(#loc61)
    %61 = "top.Conv"(%58, %59, %60) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x512x7x7xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc62)
    %62 = "top.Add"(%55, %61) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc63)
    %63 = "top.Weight"() : () -> tensor<512x512x3x3xf32> loc(#loc64)
    %64 = "top.Weight"() : () -> tensor<512xf32> loc(#loc65)
    %65 = "top.Conv"(%62, %63, %64) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x512x7x7xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc66)
    %66 = "top.Weight"() : () -> tensor<512x512x3x3xf32> loc(#loc67)
    %67 = "top.Weight"() : () -> tensor<512xf32> loc(#loc68)
    %68 = "top.Conv"(%65, %66, %67) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x512x7x7xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc69)
    %69 = "top.Add"(%62, %68) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc70)
    %70 = "top.AvgPool"(%69) {count_include_pad = true, do_relu = false, keepdims = true, kernel_shape = [7, 7], pad_value = 0 : i64, pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [1, 1]} : (tensor<1x512x7x7xf32>) -> tensor<1x512x1x1xf32> loc(#loc71)
    %71 = "top.Reshape"(%70) : (tensor<1x512x1x1xf32>) -> tensor<1x512xf32> loc(#loc72)
    %72 = "top.Weight"() : () -> tensor<512x1000xf32> loc(#loc73)
    %73 = "top.Weight"() : () -> tensor<1000xf32> loc(#loc74)
    %74 = "top.MatMul"(%71, %72, %73) {do_relu = false, hdim_is_batch = false, keep_dims = true, left_transpose = false, output_transpose = false, relu_limit = -1.000000e+00 : f64, right_transpose = false} : (tensor<1x512xf32>, tensor<512x1000xf32>, tensor<1000xf32>) -> tensor<1x1000xf32> loc(#loc75)
    return %74 : tensor<1x1000xf32> loc(#loc)
  } loc(#loc)
} loc(#loc)

Aug 26 '25 15:08 kragall

This is the first I've heard of the top dialect... I tried to find a source for this, and I think it's from https://github.com/sophgo/tpu-mlir ? Maybe there's a binary from that project that could be used to get this down to linalg. Maybe top-to-linalg? https://github.com/sophgo/tpu-mlir/tree/master/include/tpu_mlir/Conversion/TopToLinalg

Aug 26 '25 19:08 j2kun

I've got a container from https://github.com/sophgo/tpu-mlir. There's also a "tpu-opt" binary, which is essentially their version of mlir-opt. But I can only find three conversion passes:

--convert-qdq-to-calibrated-dialect              -   Convert from qdq model to regular quantized model
--convert-top-to-tosa                                      -   Convert top-level Top Ops to Tosa Ops
--convert-top-to-tpu                                        -   Convert top-level Top Ops to Tpu Ops

So I'm not sure about the conversion to linalg. The conversion to tosa worked after some experimentation:

module @resnet18 attributes {module.FLOPs = 3635523560 : i64, module.chip = "ALL", module.platform = "ONNX", module.state = "TOSA_F32", module.weight_file = "resnet18_top_f32_all_weight.npz"} {
  func.func @main(%arg0: tensor<1x3x224x224xf32> loc(unknown)) -> tensor<1x1000xf32> {
    %0 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc1)
    %1 = "tosa.const"() <{value = dense<"0x14C3.....
    ....
    %43 = "tosa.transpose"(%arg0, %0) : (tensor<1x3x224x224xf32>, tensor<4xi32>) -> tensor<1x224x224x3xf32> loc(#loc1)
    %44 = "tosa.conv2d"(%43, %1, %2) <{dilation = array<i64: 1, 1>, pad = array<i64: 3, 3, 3, 3>, stride = array<i64: 2, 2>}> : (tensor<1x224x224x3xf32>, tensor<64x7x7x3xf32>, tensor<64xf32>) -> tensor<1x112x112x64xf32> loc(#loc43)
    %45 = "tosa.clamp"(%44) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xf32> loc(#loc43)
    %46 = "tosa.max_pool2d"(%45) <{kernel = array<i64: 3, 3>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x112x112x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc44)
    %47 = "tosa.conv2d"(%46, %3, %4) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc45)
    %48 = "tosa.clamp"(%47) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc45)
    %49 = "tosa.conv2d"(%48, %5, %6) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc46)
    %50 = "tosa.add"(%46, %49) : (tensor<1x56x56x64xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc47)
    %51 = "tosa.clamp"(%50) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc47)
    %52 = "tosa.conv2d"(%51, %7, %8) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc48)
    %53 = "tosa.clamp"(%52) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc48)
    %54 = "tosa.conv2d"(%53, %9, %10) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc49)
    %55 = "tosa.add"(%51, %54) : (tensor<1x56x56x64xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc50)
    %56 = "tosa.clamp"(%55) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc50)
    %57 = "tosa.conv2d"(%56, %11, %12) <{dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 2, 2>}> : (tensor<1x56x56x64xf32>, tensor<128x1x1x64xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc51)
    %58 = "tosa.conv2d"(%56, %13, %14) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x56x56x64xf32>, tensor<128x3x3x64xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc52)
    %59 = "tosa.clamp"(%58) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc52)
    %60 = "tosa.conv2d"(%59, %15, %16) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x28x28x128xf32>, tensor<128x3x3x128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc53)
    %61 = "tosa.add"(%57, %60) : (tensor<1x28x28x128xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc54)
    %62 = "tosa.clamp"(%61) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc54)
    %63 = "tosa.conv2d"(%62, %17, %18) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x28x28x128xf32>, tensor<128x3x3x128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc55)
    %64 = "tosa.clamp"(%63) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc55)
    %65 = "tosa.conv2d"(%64, %19, %20) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x28x28x128xf32>, tensor<128x3x3x128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc56)
    %66 = "tosa.add"(%62, %65) : (tensor<1x28x28x128xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc57)
    %67 = "tosa.clamp"(%66) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc57)
    %68 = "tosa.conv2d"(%67, %21, %22) <{dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 2, 2>}> : (tensor<1x28x28x128xf32>, tensor<256x1x1x128xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc58)
    %69 = "tosa.conv2d"(%67, %23, %24) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x28x28x128xf32>, tensor<256x3x3x128xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc59)
    %70 = "tosa.clamp"(%69) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc59)
    %71 = "tosa.conv2d"(%70, %25, %26) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x14x14x256xf32>, tensor<256x3x3x256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc60)
    %72 = "tosa.add"(%68, %71) : (tensor<1x14x14x256xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc61)
    %73 = "tosa.clamp"(%72) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc61)
    %74 = "tosa.conv2d"(%73, %27, %28) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x14x14x256xf32>, tensor<256x3x3x256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc62)
    %75 = "tosa.clamp"(%74) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc62)
    %76 = "tosa.conv2d"(%75, %29, %30) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x14x14x256xf32>, tensor<256x3x3x256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc63)
    %77 = "tosa.add"(%73, %76) : (tensor<1x14x14x256xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc64)
    %78 = "tosa.clamp"(%77) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc64)
    %79 = "tosa.conv2d"(%78, %31, %32) <{dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 2, 2>}> : (tensor<1x14x14x256xf32>, tensor<512x1x1x256xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc65)
    %80 = "tosa.conv2d"(%78, %33, %34) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x14x14x256xf32>, tensor<512x3x3x256xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc66)
    %81 = "tosa.clamp"(%80) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc66)
    %82 = "tosa.conv2d"(%81, %35, %36) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>, tensor<512x3x3x512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc67)
    %83 = "tosa.add"(%79, %82) : (tensor<1x7x7x512xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc68)
    %84 = "tosa.clamp"(%83) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc68)
    %85 = "tosa.conv2d"(%84, %37, %38) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>, tensor<512x3x3x512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc69)
    %86 = "tosa.clamp"(%85) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc69)
    %87 = "tosa.conv2d"(%86, %39, %40) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>, tensor<512x3x3x512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc70)
    %88 = "tosa.add"(%84, %87) : (tensor<1x7x7x512xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc71)
    %89 = "tosa.clamp"(%88) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc71)
    %90 = "tosa.avg_pool2d"(%89) <{acc_type = f32, kernel = array<i64: 7, 7>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>) -> tensor<1x1x1x512xf32> loc(#loc72)
    %91 = "tosa.reshape"(%90) <{new_shape = array<i64: 1, 1, 512>}> : (tensor<1x1x1x512xf32>) -> tensor<1x1x512xf32> loc(#loc42)
    %92 = "tosa.matmul"(%91, %41) : (tensor<1x1x512xf32>, tensor<1x512x1000xf32>) -> tensor<1x1x1000xf32> loc(#loc42)
    %93 = "tosa.reshape"(%92) <{new_shape = array<i64: 1, 1000>}> : (tensor<1x1x1000xf32>) -> tensor<1x1000xf32> loc(#loc42)
    %94 = "tosa.add"(%93, %42) : (tensor<1x1000xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> loc(#loc42)
    return %94 : tensor<1x1000xf32> loc(#loc)
  } loc(#loc)
} loc(#loc)

The file is 89MB big, because of the const operations at the beginning, which I had to skip for posting.

Aug 27 '25 08:08 kragall

torch-mlir has a bunch of passes and conversions e.g. https://iree.dev/guides/ml-frameworks/onnx/

Nov 06 '25 00:11 ai-mannamalai

Besides going onnx->iree I think onnx also directly supports converting onnx dialect to stableHLO, and stablehlo-opt supports lowering to linalg, and we can take it from there.

https://github.com/onnx/onnx-mlir/tree/main/src/Tools/onnx-mlir-opt

Nov 06 '25 05:11 j2kun