Enzyme-JAX icon indicating copy to clipboard operation
Enzyme-JAX copied to clipboard

Failed raising

Open wsmoses opened this issue 7 months ago • 3 comments

I’m getting an error trying to run a very stripped down version of Oceananigans. This case has a handful of kernels and no fill halo regions (everythign has been commented out). The model does not depend on halos and runs on CPU with --check-bounds=yes when we set halo=(0, 0, 0). However, it does not compile with Reactant with halo=(0, 0, 0). It DOES compile with halo=(1, 1, 1) which is very suspicious. The error for halo=(0, 0, 0) is below:

[ Info: Compiling...
failed to raise func: func.func private @"##call__Z40gpu_compute_hydrostatic_free_surface_Gc_16CompilerMetadataI10StaticSizeI12_32__32__32_E12DynamicCheckvv7NDRangeILi3ES0_I10_2__2__32_ES0_I11_16__16__1_EvvEE11OffsetArrayI7Float64Li3E13CuTracedArrayIS9_Li3ELi1E12_32__32__32_EE21LatitudeLongitudeGridIS9_8Periodic7BoundedSF_28StaticVerticalDiscretizationIS8_IS9_Li1E12StepRangeLenIS9_14TwicePrecisionIS9_ESJ_5Int64EESM_S9_S9_ES9_S9_SM_SM_S9_S9_SM_SM_S8_IS9_Li1ESA_IS9_Li1ELi1E5_32__EESP_SP_SP_S9_S9_vSK_Ev5TupleI3ValILi1EESS_I2_cEvv24DefaultBoundaryConditionI17BoundaryConditionI4FluxvEEvv24PrescribedVelocityFieldsI9ZeroFieldISK_Li3EES12_S12_vEv10NamedTupleI5__c__SR_ISC_EEvS14_I2__SR_IJEEES14_I53__time___last__t___last_stage__t___iteration___stage_SR_I13TracedRNumberIS9_ES9_S9_S19_ISK_ESK_EE15DiscreteForcingIv4funcEE#250$par4"(%arg0: memref<32x32x32xf64, 1>, %arg1: memref<32x32x32xf64, 1>) {
  affine.parallel (%arg2, %arg3) = (0, 0) to (128, 256) {
    %0 = affine.load %arg1[(%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024, (%arg2 floordiv 2) * 16 + %arg3 floordiv 16 - ((%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024) * 32, (%arg2 * 16 + %arg3 + (%arg3 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
    %1 = arith.mulf %0, %0 {fastmathFlags = #llvm.fastmath<none>} : f64
    %2 = arith.mulf %0, %1 {fastmathFlags = #llvm.fastmath<none>} : f64
    %3 = arith.subf %0, %2 {fastmathFlags = #llvm.fastmath<none>} : f64
    affine.store %3, %arg0[(%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024, (%arg2 floordiv 2) * 16 + %arg3 floordiv 16 - ((%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024) * 32, (%arg2 * 16 + %arg3 + (%arg3 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
  }
  return
}
failed to raise func: func.func private @"##call__Z28gpu__cache_field_tendencies_16CompilerMetadataI10StaticSizeI12_32__32__32_E12DynamicCheckvv7NDRangeILi3ES0_I10_2__2__32_ES0_I11_16__16__1_EvvEE11OffsetArrayI7Float64Li3E13CuTracedArrayIS9_Li3ELi1E12_32__32__32_EESC_#248$par3"(%arg0: memref<32x32x32xf64, 1>, %arg1: memref<32x32x32xf64, 1>) {
  affine.parallel (%arg2, %arg3) = (0, 0) to (128, 256) {
    %0 = affine.load %arg1[(%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024, (%arg2 floordiv 2) * 16 + %arg3 floordiv 16 - ((%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024) * 32, (%arg2 * 16 + %arg3 + (%arg3 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
    affine.store %0, %arg0[(%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024, (%arg2 floordiv 2) * 16 + %arg3 floordiv 16 - ((%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024) * 32, (%arg2 * 16 + %arg3 + (%arg3 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
  }
  return
}
failed to raise func: func.func private @"##call__Z27gpu__ab2_step_tracer_field_16CompilerMetadataI10StaticSizeI12_32__32__32_E12DynamicCheckvv7NDRangeILi3ES0_I10_2__2__32_ES0_I11_16__16__1_EvvEE11OffsetArrayI7Float64Li3E13CuTracedArrayIS9_Li3ELi1E12_32__32__32_EE21LatitudeLongitudeGridIS9_8Periodic7BoundedSF_28StaticVerticalDiscretizationIS8_IS9_Li1E12StepRangeLenIS9_14TwicePrecisionIS9_ESJ_5Int64EESM_S9_S9_ES9_S9_SM_SM_S9_S9_SM_SM_S8_IS9_Li1ESA_IS9_Li1ELi1E5_32__EESP_SP_SP_S9_S9_vSK_ES9_S9_SC_SC_#246$par2"(%arg0: memref<32x32x32xf64, 1>, %arg1: memref<32x32x32xf64, 1>, %arg2: memref<32x32x32xf64, 1>) {
  %cst = arith.constant 0.000000e+00 : f64
  %cst_0 = arith.constant 1.000000e-02 : f64
  affine.parallel (%arg3, %arg4) = (0, 0) to (128, 256) {
    %0 = affine.load %arg1[(%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024, (%arg3 floordiv 2) * 16 + %arg4 floordiv 16 - ((%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024) * 32, (%arg3 * 16 + %arg4 + (%arg4 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
    %1 = affine.load %arg2[(%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024, (%arg3 floordiv 2) * 16 + %arg4 floordiv 16 - ((%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024) * 32, (%arg3 * 16 + %arg4 + (%arg4 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
    %2 = arith.mulf %1, %cst {fastmathFlags = #llvm.fastmath<none>} : f64
    %3 = arith.subf %0, %2 {fastmathFlags = #llvm.fastmath<none>} : f64
    %4 = affine.load %arg0[(%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024, (%arg3 floordiv 2) * 16 + %arg4 floordiv 16 - ((%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024) * 32, (%arg3 * 16 + %arg4 + (%arg4 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
    %5 = arith.mulf %3, %cst_0 {fastmathFlags = #llvm.fastmath<none>} : f64
    %6 = arith.addf %5, %4 {fastmathFlags = #llvm.fastmath<none>} : f64
    affine.store %6, %arg0[(%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024, (%arg3 floordiv 2) * 16 + %arg4 floordiv 16 - ((%arg3 * 16 + %arg4 + (%arg3 floordiv 2) * 480 + (%arg4 floordiv 16) * 16) floordiv 1024) * 32, (%arg3 * 16 + %arg4 + (%arg4 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
  }
  return
}
failed to raise func: func.func private @"##call__Z40gpu_compute_hydrostatic_free_surface_Gc_16CompilerMetadataI10StaticSizeI12_32__32__32_E12DynamicCheckvv7NDRangeILi3ES0_I10_2__2__32_ES0_I11_16__16__1_EvvEE11OffsetArrayI7Float64Li3E13CuTracedArrayIS9_Li3ELi1E12_32__32__32_EE21LatitudeLongitudeGridIS9_8Periodic7BoundedSF_28StaticVerticalDiscretizationIS8_IS9_Li1E12StepRangeLenIS9_14TwicePrecisionIS9_ESJ_5Int64EESM_S9_S9_ES9_S9_SM_SM_S9_S9_SM_SM_S8_IS9_Li1ESA_IS9_Li1ELi1E5_32__EESP_SP_SP_S9_S9_vSK_Ev5TupleI3ValILi1EESS_I2_cEvv24DefaultBoundaryConditionI17BoundaryConditionI4FluxvEEvv24PrescribedVelocityFieldsI9ZeroFieldISK_Li3EES12_S12_vEv10NamedTupleI5__c__SR_ISC_EEvS14_I2__SR_IJEEES14_I53__time___last__t___last_stage__t___iteration___stage_SR_I13TracedRNumberIS9_ES9_S9_S19_ISK_ESK_EE15DiscreteForcingIv4funcEE#244$par1"(%arg0: memref<32x32x32xf64, 1>, %arg1: memref<32x32x32xf64, 1>) {
  affine.parallel (%arg2, %arg3) = (0, 0) to (128, 256) {
    %0 = affine.load %arg1[(%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024, (%arg2 floordiv 2) * 16 + %arg3 floordiv 16 - ((%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024) * 32, (%arg2 * 16 + %arg3 + (%arg3 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
    %1 = arith.mulf %0, %0 {fastmathFlags = #llvm.fastmath<none>} : f64
    %2 = arith.mulf %0, %1 {fastmathFlags = #llvm.fastmath<none>} : f64
    %3 = arith.subf %0, %2 {fastmathFlags = #llvm.fastmath<none>} : f64
    affine.store %3, %arg0[(%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024, (%arg2 floordiv 2) * 16 + %arg3 floordiv 16 - ((%arg2 * 16 + %arg3 + (%arg2 floordiv 2) * 480 + (%arg3 floordiv 16) * 16) floordiv 1024) * 32, (%arg2 * 16 + %arg3 + (%arg3 floordiv 16) * 16) mod 32] : memref<32x32x32xf64, 1>
  }
  return
}
┌ Error: Compilation failed, MLIR module written to /var/folders/pv/2k_ry3f951jghlpbnn_hcqg80000gn/T/reactant_11UwPr/module_000_iN14_post_pm.mlir
└ @ Reactant.MLIR.IR ~/.julia/packages/Reactant/PtgZG/src/mlir/IR/Pass.jl:116
ERROR: LoadError: "failed to run pass manager on module"

wsmoses avatar Apr 03 '25 17:04 wsmoses