ONNX to MLIR converter
I've stumbled across an ONNX to MLIR converter and used it to translate a ResNet18 ONNX model into an MLIR file. I found the guide how to do this here.
The MLIR file uses a custom "top" dialect. So I'm not sure if this helps us to translate ML models into MLIR files for HEIR. That's the MLIR file I get (I removed the locs at the end).
module @resnet18 attributes {module.FLOPs = 3635523560 : i64, module.chip = "ALL", module.platform = "ONNX", module.state = "TOP_F32", module.weight_file = "resnet18_top_f32_all_weight.npz"} {
func.func @main(%arg0: tensor<1x3x224x224xf32> loc(unknown)) -> tensor<1x1000xf32> {
%0 = "top.Input"(%arg0) {channel_format = "nchw", keep_aspect_ratio = false, keep_ratio_mode = "letterbox", mean = [123.67500305175781, 116.27999877929688, 103.52999877929688], pad_type = "center", pad_value = 0 : i64, pixel_format = "rgb", resize_dims = [256, 256], scale = [0.017100000753998756, 0.017500000074505806, 0.017400000244379044]} : (tensor<1x3x224x224xf32>) -> tensor<1x3x224x224xf32> loc(#loc1)
%1 = "top.Weight"() : () -> tensor<64x3x7x7xf32> loc(#loc2)
%2 = "top.Weight"() : () -> tensor<64xf32> loc(#loc3)
%3 = "top.Conv"(%0, %1, %2) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [7, 7], pads = [3, 3, 3, 3], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x3x224x224xf32>, tensor<64x3x7x7xf32>, tensor<64xf32>) -> tensor<1x64x112x112xf32> loc(#loc4)
%4 = "top.MaxPool"(%3) {count_include_pad = false, do_relu = false, keepdims = true, kernel_shape = [3, 3], pad_value = 0 : i64, pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2]} : (tensor<1x64x112x112xf32>) -> tensor<1x64x56x56xf32> loc(#loc5)
%5 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc6)
%6 = "top.Weight"() : () -> tensor<64xf32> loc(#loc7)
%7 = "top.Conv"(%4, %5, %6) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc8)
%8 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc9)
%9 = "top.Weight"() : () -> tensor<64xf32> loc(#loc10)
%10 = "top.Conv"(%7, %8, %9) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc11)
%11 = "top.Add"(%4, %10) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc12)
%12 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc13)
%13 = "top.Weight"() : () -> tensor<64xf32> loc(#loc14)
%14 = "top.Conv"(%11, %12, %13) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc15)
%15 = "top.Weight"() : () -> tensor<64x64x3x3xf32> loc(#loc16)
%16 = "top.Weight"() : () -> tensor<64xf32> loc(#loc17)
%17 = "top.Conv"(%14, %15, %16) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>) -> tensor<1x64x56x56xf32> loc(#loc18)
%18 = "top.Add"(%11, %17) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc19)
%19 = "top.Weight"() : () -> tensor<128x64x1x1xf32> loc(#loc20)
%20 = "top.Weight"() : () -> tensor<128xf32> loc(#loc21)
%21 = "top.Conv"(%18, %19, %20) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [1, 1], pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<128x64x1x1xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc22)
%22 = "top.Weight"() : () -> tensor<128x64x3x3xf32> loc(#loc23)
%23 = "top.Weight"() : () -> tensor<128xf32> loc(#loc24)
%24 = "top.Conv"(%18, %22, %23) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x64x56x56xf32>, tensor<128x64x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc25)
%25 = "top.Weight"() : () -> tensor<128x128x3x3xf32> loc(#loc26)
%26 = "top.Weight"() : () -> tensor<128xf32> loc(#loc27)
%27 = "top.Conv"(%24, %25, %26) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc28)
%28 = "top.Add"(%21, %27) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc29)
%29 = "top.Weight"() : () -> tensor<128x128x3x3xf32> loc(#loc30)
%30 = "top.Weight"() : () -> tensor<128xf32> loc(#loc31)
%31 = "top.Conv"(%28, %29, %30) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc32)
%32 = "top.Weight"() : () -> tensor<128x128x3x3xf32> loc(#loc33)
%33 = "top.Weight"() : () -> tensor<128xf32> loc(#loc34)
%34 = "top.Conv"(%31, %32, %33) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>) -> tensor<1x128x28x28xf32> loc(#loc35)
%35 = "top.Add"(%28, %34) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc36)
%36 = "top.Weight"() : () -> tensor<256x128x1x1xf32> loc(#loc37)
%37 = "top.Weight"() : () -> tensor<256xf32> loc(#loc38)
%38 = "top.Conv"(%35, %36, %37) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [1, 1], pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<256x128x1x1xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc39)
%39 = "top.Weight"() : () -> tensor<256x128x3x3xf32> loc(#loc40)
%40 = "top.Weight"() : () -> tensor<256xf32> loc(#loc41)
%41 = "top.Conv"(%35, %39, %40) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x128x28x28xf32>, tensor<256x128x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc42)
%42 = "top.Weight"() : () -> tensor<256x256x3x3xf32> loc(#loc43)
%43 = "top.Weight"() : () -> tensor<256xf32> loc(#loc44)
%44 = "top.Conv"(%41, %42, %43) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc45)
%45 = "top.Add"(%38, %44) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc46)
%46 = "top.Weight"() : () -> tensor<256x256x3x3xf32> loc(#loc47)
%47 = "top.Weight"() : () -> tensor<256xf32> loc(#loc48)
%48 = "top.Conv"(%45, %46, %47) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc49)
%49 = "top.Weight"() : () -> tensor<256x256x3x3xf32> loc(#loc50)
%50 = "top.Weight"() : () -> tensor<256xf32> loc(#loc51)
%51 = "top.Conv"(%48, %49, %50) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>) -> tensor<1x256x14x14xf32> loc(#loc52)
%52 = "top.Add"(%45, %51) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc53)
%53 = "top.Weight"() : () -> tensor<512x256x1x1xf32> loc(#loc54)
%54 = "top.Weight"() : () -> tensor<512xf32> loc(#loc55)
%55 = "top.Conv"(%52, %53, %54) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [1, 1], pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<512x256x1x1xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc56)
%56 = "top.Weight"() : () -> tensor<512x256x3x3xf32> loc(#loc57)
%57 = "top.Weight"() : () -> tensor<512xf32> loc(#loc58)
%58 = "top.Conv"(%52, %56, %57) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [2, 2], weight_is_coeff = 1 : i64} : (tensor<1x256x14x14xf32>, tensor<512x256x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc59)
%59 = "top.Weight"() : () -> tensor<512x512x3x3xf32> loc(#loc60)
%60 = "top.Weight"() : () -> tensor<512xf32> loc(#loc61)
%61 = "top.Conv"(%58, %59, %60) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x512x7x7xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc62)
%62 = "top.Add"(%55, %61) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc63)
%63 = "top.Weight"() : () -> tensor<512x512x3x3xf32> loc(#loc64)
%64 = "top.Weight"() : () -> tensor<512xf32> loc(#loc65)
%65 = "top.Conv"(%62, %63, %64) {dilations = [1, 1], do_relu = true, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x512x7x7xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc66)
%66 = "top.Weight"() : () -> tensor<512x512x3x3xf32> loc(#loc67)
%67 = "top.Weight"() : () -> tensor<512xf32> loc(#loc68)
%68 = "top.Conv"(%65, %66, %67) {dilations = [1, 1], do_relu = false, dynweight_reorderd = false, group = 1 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], relu_limit = -1.000000e+00 : f64, strides = [1, 1], weight_is_coeff = 1 : i64} : (tensor<1x512x7x7xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>) -> tensor<1x512x7x7xf32> loc(#loc69)
%69 = "top.Add"(%62, %68) {do_relu = true, relu_limit = -1.000000e+00 : f64} : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc70)
%70 = "top.AvgPool"(%69) {count_include_pad = true, do_relu = false, keepdims = true, kernel_shape = [7, 7], pad_value = 0 : i64, pads = [0, 0, 0, 0], relu_limit = -1.000000e+00 : f64, strides = [1, 1]} : (tensor<1x512x7x7xf32>) -> tensor<1x512x1x1xf32> loc(#loc71)
%71 = "top.Reshape"(%70) : (tensor<1x512x1x1xf32>) -> tensor<1x512xf32> loc(#loc72)
%72 = "top.Weight"() : () -> tensor<512x1000xf32> loc(#loc73)
%73 = "top.Weight"() : () -> tensor<1000xf32> loc(#loc74)
%74 = "top.MatMul"(%71, %72, %73) {do_relu = false, hdim_is_batch = false, keep_dims = true, left_transpose = false, output_transpose = false, relu_limit = -1.000000e+00 : f64, right_transpose = false} : (tensor<1x512xf32>, tensor<512x1000xf32>, tensor<1000xf32>) -> tensor<1x1000xf32> loc(#loc75)
return %74 : tensor<1x1000xf32> loc(#loc)
} loc(#loc)
} loc(#loc)
This is the first I've heard of the top dialect... I tried to find a source for this, and I think it's from https://github.com/sophgo/tpu-mlir ? Maybe there's a binary from that project that could be used to get this down to linalg. Maybe top-to-linalg? https://github.com/sophgo/tpu-mlir/tree/master/include/tpu_mlir/Conversion/TopToLinalg
I've got a container from https://github.com/sophgo/tpu-mlir. There's also a "tpu-opt" binary, which is essentially their version of mlir-opt. But I can only find three conversion passes:
--convert-qdq-to-calibrated-dialect - Convert from qdq model to regular quantized model
--convert-top-to-tosa - Convert top-level Top Ops to Tosa Ops
--convert-top-to-tpu - Convert top-level Top Ops to Tpu Ops
So I'm not sure about the conversion to linalg. The conversion to tosa worked after some experimentation:
module @resnet18 attributes {module.FLOPs = 3635523560 : i64, module.chip = "ALL", module.platform = "ONNX", module.state = "TOSA_F32", module.weight_file = "resnet18_top_f32_all_weight.npz"} {
func.func @main(%arg0: tensor<1x3x224x224xf32> loc(unknown)) -> tensor<1x1000xf32> {
%0 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc1)
%1 = "tosa.const"() <{value = dense<"0x14C3.....
....
%43 = "tosa.transpose"(%arg0, %0) : (tensor<1x3x224x224xf32>, tensor<4xi32>) -> tensor<1x224x224x3xf32> loc(#loc1)
%44 = "tosa.conv2d"(%43, %1, %2) <{dilation = array<i64: 1, 1>, pad = array<i64: 3, 3, 3, 3>, stride = array<i64: 2, 2>}> : (tensor<1x224x224x3xf32>, tensor<64x7x7x3xf32>, tensor<64xf32>) -> tensor<1x112x112x64xf32> loc(#loc43)
%45 = "tosa.clamp"(%44) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xf32> loc(#loc43)
%46 = "tosa.max_pool2d"(%45) <{kernel = array<i64: 3, 3>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x112x112x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc44)
%47 = "tosa.conv2d"(%46, %3, %4) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc45)
%48 = "tosa.clamp"(%47) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc45)
%49 = "tosa.conv2d"(%48, %5, %6) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc46)
%50 = "tosa.add"(%46, %49) : (tensor<1x56x56x64xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc47)
%51 = "tosa.clamp"(%50) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc47)
%52 = "tosa.conv2d"(%51, %7, %8) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc48)
%53 = "tosa.clamp"(%52) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc48)
%54 = "tosa.conv2d"(%53, %9, %10) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x56x56x64xf32>, tensor<64x3x3x64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32> loc(#loc49)
%55 = "tosa.add"(%51, %54) : (tensor<1x56x56x64xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc50)
%56 = "tosa.clamp"(%55) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc50)
%57 = "tosa.conv2d"(%56, %11, %12) <{dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 2, 2>}> : (tensor<1x56x56x64xf32>, tensor<128x1x1x64xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc51)
%58 = "tosa.conv2d"(%56, %13, %14) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x56x56x64xf32>, tensor<128x3x3x64xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc52)
%59 = "tosa.clamp"(%58) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc52)
%60 = "tosa.conv2d"(%59, %15, %16) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x28x28x128xf32>, tensor<128x3x3x128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc53)
%61 = "tosa.add"(%57, %60) : (tensor<1x28x28x128xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc54)
%62 = "tosa.clamp"(%61) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc54)
%63 = "tosa.conv2d"(%62, %17, %18) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x28x28x128xf32>, tensor<128x3x3x128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc55)
%64 = "tosa.clamp"(%63) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc55)
%65 = "tosa.conv2d"(%64, %19, %20) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x28x28x128xf32>, tensor<128x3x3x128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32> loc(#loc56)
%66 = "tosa.add"(%62, %65) : (tensor<1x28x28x128xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc57)
%67 = "tosa.clamp"(%66) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc57)
%68 = "tosa.conv2d"(%67, %21, %22) <{dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 2, 2>}> : (tensor<1x28x28x128xf32>, tensor<256x1x1x128xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc58)
%69 = "tosa.conv2d"(%67, %23, %24) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x28x28x128xf32>, tensor<256x3x3x128xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc59)
%70 = "tosa.clamp"(%69) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc59)
%71 = "tosa.conv2d"(%70, %25, %26) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x14x14x256xf32>, tensor<256x3x3x256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc60)
%72 = "tosa.add"(%68, %71) : (tensor<1x14x14x256xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc61)
%73 = "tosa.clamp"(%72) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc61)
%74 = "tosa.conv2d"(%73, %27, %28) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x14x14x256xf32>, tensor<256x3x3x256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc62)
%75 = "tosa.clamp"(%74) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc62)
%76 = "tosa.conv2d"(%75, %29, %30) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x14x14x256xf32>, tensor<256x3x3x256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32> loc(#loc63)
%77 = "tosa.add"(%73, %76) : (tensor<1x14x14x256xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc64)
%78 = "tosa.clamp"(%77) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc64)
%79 = "tosa.conv2d"(%78, %31, %32) <{dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 2, 2>}> : (tensor<1x14x14x256xf32>, tensor<512x1x1x256xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc65)
%80 = "tosa.conv2d"(%78, %33, %34) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 2, 2>}> : (tensor<1x14x14x256xf32>, tensor<512x3x3x256xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc66)
%81 = "tosa.clamp"(%80) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc66)
%82 = "tosa.conv2d"(%81, %35, %36) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>, tensor<512x3x3x512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc67)
%83 = "tosa.add"(%79, %82) : (tensor<1x7x7x512xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc68)
%84 = "tosa.clamp"(%83) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc68)
%85 = "tosa.conv2d"(%84, %37, %38) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>, tensor<512x3x3x512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc69)
%86 = "tosa.clamp"(%85) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc69)
%87 = "tosa.conv2d"(%86, %39, %40) <{dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>, tensor<512x3x3x512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32> loc(#loc70)
%88 = "tosa.add"(%84, %87) : (tensor<1x7x7x512xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc71)
%89 = "tosa.clamp"(%88) <{max_fp = 0x7F800000 : f32, max_int = 0 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64}> : (tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc71)
%90 = "tosa.avg_pool2d"(%89) <{acc_type = f32, kernel = array<i64: 7, 7>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>}> : (tensor<1x7x7x512xf32>) -> tensor<1x1x1x512xf32> loc(#loc72)
%91 = "tosa.reshape"(%90) <{new_shape = array<i64: 1, 1, 512>}> : (tensor<1x1x1x512xf32>) -> tensor<1x1x512xf32> loc(#loc42)
%92 = "tosa.matmul"(%91, %41) : (tensor<1x1x512xf32>, tensor<1x512x1000xf32>) -> tensor<1x1x1000xf32> loc(#loc42)
%93 = "tosa.reshape"(%92) <{new_shape = array<i64: 1, 1000>}> : (tensor<1x1x1000xf32>) -> tensor<1x1000xf32> loc(#loc42)
%94 = "tosa.add"(%93, %42) : (tensor<1x1000xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> loc(#loc42)
return %94 : tensor<1x1000xf32> loc(#loc)
} loc(#loc)
} loc(#loc)
The file is 89MB big, because of the const operations at the beginning, which I had to skip for posting.
torch-mlir has a bunch of passes and conversions e.g. https://iree.dev/guides/ml-frameworks/onnx/
Besides going onnx->iree I think onnx also directly supports converting onnx dialect to stableHLO, and stablehlo-opt supports lowering to linalg, and we can take it from there.
https://github.com/onnx/onnx-mlir/tree/main/src/Tools/onnx-mlir-opt