ClimaCore.jl icon indicating copy to clipboard operation
ClimaCore.jl copied to clipboard

add back uniform_vertical_levels

Open juliasloan25 opened this issue 5 months ago • 4 comments

#1992 removed the uniform_vertical_levels function. This led to a GPU error downstream in ClimaLand (see error message below). This PR reverts that change and makes a patch release - this has been tested from ClimaLand (see run here).

The error came up in ClimaLand in a PR switching the canopy temperature variable from being stepped explicitly to implicitly. We also step soil water content implicitly, but there has been no error with that case. In the soil-only case, our implicit variable is defined on a ExtrudedFiniteDifferenceSpace. In the new combined soil/canopy case, we have one implicit variable on a ExtrudedFiniteDifferenceSpace, and the other on a SpectralElementSpace2D. Maybe the bug comes from having two variables on two different spaces, or from the SpectralElementSpace2D specifically?

Here is the error (e.g. from this failing run):

ERROR: LoadError: InvalidIRError: compiling MethodInstance for ClimaCoreCUDAExt.multiple_field_solve_kernel!(::ClimaComms.CUDADevice, ::Tuple{…}, ::Tuple{…}, ::Tuple{…}, ::Tuple{…}, ::ClimaCore.Fields.Field{…}, ::ClimaCore.DataLayouts.UniversalSize{…}, ::Val{…}) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to _single_field_solve!)
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/ClimaCore/8E0O8/ext/cuda/matrix_fields_multiple_field_solve.jl:70
 [2] generated_single_field_solve!
   @ ~/.julia/packages/ClimaCore/8E0O8/ext/cuda/matrix_fields_multiple_field_solve.jl:56
 [3] multiple_field_solve_kernel!
   @ ~/.julia/packages/ClimaCore/8E0O8/ext/cuda/matrix_fields_multiple_field_solve.jl:95
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:458 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:457 [inlined]
  [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:103
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:97 [inlined]
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:136
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:115 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:111
 [10] compile
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:103 [inlined]
 [11] #1145
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/compilation.jl:254 [inlined]
 [12] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:52
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:42
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/Tl08O/src/compiler/compilation.jl:253
 [15] actual_compilation(cache::Dict{Any, CUDA.CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:237
 [16] cached_compilation(cache::Dict{Any, CUDA.CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:151
 [17] macro expansion
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:369 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(ClimaCoreCUDAExt.multiple_field_solve_kernel!), tt::Type{Tuple{…}}; kwargs::@Kwargs{always_inline::Bool})
    @ CUDA ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:364
 [20] cufunction
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:361 [inlined]
 [21] macro expansion
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:112 [inlined]
 [22] threads_via_occupancy(f!::typeof(ClimaCoreCUDAExt.multiple_field_solve_kernel!), args::Tuple{ClimaComms.CUDADevice, Tuple{…}, Tuple{…}, Tuple{…}, Tuple{…}, ClimaCore.Fields.Field{…}, ClimaCore.DataLayouts.UniversalSize{…}, Val{…}})
    @ ClimaCoreCUDAExt ~/.julia/packages/ClimaCore/8E0O8/ext/cuda/cuda_utils.jl:94
 [23] macro expansion
    @ ~/.julia/packages/ClimaCore/8E0O8/ext/cuda/matrix_fields_multiple_field_solve.jl:40 [inlined]
 [24] multiple_field_solve!(::ClimaComms.CUDADevice, cache::ClimaCore.MatrixFields.FieldVectorView{…}, x::ClimaCore.MatrixFields.FieldVectorView{…}, A::ClimaCore.MatrixFields.FieldMatrix{…}, b::ClimaCore.MatrixFields.FieldVectorView{…}, x1::ClimaCore.Fields.Field{…})
    @ ClimaCoreCUDAExt ~/.julia/packages/NVTX/pfSOQ/src/macro.jl:194
 [25] multiple_field_solve!
    @ ~/.julia/packages/ClimaCore/8E0O8/src/MatrixFields/multiple_field_solver.jl:8 [inlined]
 [26] macro expansion
    @ ~/.julia/packages/ClimaCore/8E0O8/src/MatrixFields/field_matrix_solver.jl:276 [inlined]
 [27] run_field_matrix_solver!(::ClimaCore.MatrixFields.BlockDiagonalSolve, cache::ClimaCore.MatrixFields.FieldVectorView{…}, x::ClimaCore.MatrixFields.FieldVectorView{…}, A::ClimaCore.MatrixFields.FieldMatrix{…}, b::ClimaCore.MatrixFields.FieldVectorView{…})
    @ ClimaCore.MatrixFields ~/.julia/packages/NVTX/pfSOQ/src/macro.jl:194
 [28] macro expansion
    @ ~/.julia/packages/ClimaCore/8E0O8/src/MatrixFields/field_matrix_solver.jl:89 [inlined]
 [29] field_matrix_solve!(solver::ClimaCore.MatrixFields.FieldMatrixSolver{…}, x::ClimaCore.Fields.FieldVector{…}, A::ClimaCore.MatrixFields.FieldMatrix{…}, b::ClimaCore.Fields.FieldVector{…})
    @ ClimaCore.MatrixFields ~/.julia/packages/NVTX/pfSOQ/src/macro.jl:194
 [30] ldiv!
    @ ~/ClimaLand.jl/src/shared_utilities/implicit_timestepping.jl:177 [inlined]
 [31] macro expansion
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/nl_solvers/newtons_method.jl:605 [inlined]
 [32] solve_newton!(alg::ClimaTimeSteppers.NewtonsMethod{…}, cache::@NamedTuple{…}, x::ClimaCore.Fields.FieldVector{…}, f!::ClimaTimeSteppers.var"#99#103"{…}, j!::ClimaTimeSteppers.var"#100#104"{…}, post_implicit!::ClimaTimeSteppers.var"#101#105"{…}, post_implicit_last!::ClimaTimeSteppers.var"#102#106"{…})
    @ ClimaTimeSteppers ~/.julia/packages/NVTX/pfSOQ/src/macro.jl:194
 [33] update_stage!
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/solvers/imex_ark.jl:148 [inlined]
 [34] update_stage!
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/solvers/imex_ark.jl:93 [inlined]
 [35] update_stage!
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/solvers/imex_ark.jl:96 [inlined]
 [36] step_u!(integrator::ClimaTimeSteppers.DistributedODEIntegrator{…}, cache::ClimaTimeSteppers.IMEXARKCache{…})
    @ ClimaTimeSteppers ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/solvers/imex_ark.jl:71
 [37] macro expansion
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/integrators.jl:241 [inlined]
 [38] step_u!(integrator::ClimaTimeSteppers.DistributedODEIntegrator{…})
    @ ClimaTimeSteppers ~/.julia/packages/NVTX/pfSOQ/src/macro.jl:194
 [39] __step!(integrator::ClimaTimeSteppers.DistributedODEIntegrator{…})
    @ ClimaTimeSteppers ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/integrators.jl:213
 [40] macro expansion
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/integrators.jl:169 [inlined]
 [41] solve!(integrator::ClimaTimeSteppers.DistributedODEIntegrator{…})
    @ ClimaTimeSteppers ~/.julia/packages/NVTX/pfSOQ/src/macro.jl:194
 [42] __solve(::SciMLBase.ODEProblem{…}, ::ClimaTimeSteppers.IMEXAlgorithm{…}; kwargs::@Kwargs{…})
    @ ClimaTimeSteppers ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/integrators.jl:163
 [43] __solve
    @ ~/.julia/packages/ClimaTimeSteppers/Wc2TE/src/integrators.jl:161 [inlined]
 [44] solve_call(_prob::SciMLBase.ODEProblem{…}, args::ClimaTimeSteppers.IMEXAlgorithm{…}; merge_callbacks::Bool, kwargshandle::Nothing, kwargs::@Kwargs{…})
    @ DiffEqBase ~/.julia/packages/DiffEqBase/c8MAQ/src/solve.jl:612
 [45] solve_call
    @ DiffEqBase ~/.julia/packages/DiffEqBase/c8MAQ/src/solve.jl:569 [inlined]
 [46] #solve_up#53
    @ DiffEqBase ~/.julia/packages/DiffEqBase/c8MAQ/src/solve.jl:1080 [inlined]
 [47] solve_up
    @ DiffEqBase ~/.julia/packages/DiffEqBase/c8MAQ/src/solve.jl:1066 [inlined]
 [48] #solve#51
    @ DiffEqBase ~/.julia/packages/DiffEqBase/c8MAQ/src/solve.jl:1003 [inlined]
 [49] setup_and_solve_problem(; greet::Bool)
    @ Main ~/ClimaLand.jl/experiments/long_runs/land_region.jl:659
 [50] top-level scope
    @ ~/ClimaLand.jl/experiments/long_runs/land_region.jl:661
 [51] include(fname::String)
    @ Base.MainInclude ./client.jl:489
in expression starting at /home/jsloan/ClimaLand.jl/experiments/long_runs/land_region.jl:661
  • [x] Code follows the style guidelines OR N/A.
  • [x] Unit tests are included OR N/A.
  • [x] Code is exercised in an integration test OR N/A.
  • [x] Documentation has been added/updated OR N/A.

juliasloan25 avatar Sep 27 '24 22:09 juliasloan25