Enzyme.jl icon indicating copy to clipboard operation
Enzyme.jl copied to clipboard

[Julia 1.11] CUDA support

Open vchuravy opened this issue 7 months ago • 3 comments

Before tackling the big complicated things in https://github.com/EnzymeAD/Enzyme.jl/issues/2365

using Enzyme
using CUDA

function f(A)
  A[1] *= A[1]
  return nothing
end

A = CUDA.ones(1)

@cuda f(A)

function df(A, dA)
  autodiff_deferred(Reverse, Const(f), Const, Duplicated(A, dA))
  return nothing
end

dA = CUDA.ones(1)

@cuda df(A, dA)

Fails with:

ERROR: LoadError: AssertionError: actualRetType != Union{}
Stacktrace:
  [1] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/src/Enzyme/src/compiler.jl:4226
  [2] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams})
    @ Enzyme.Compiler ~/src/Enzyme/src/compiler.jl:3455
  [3] (::GPUCompiler.var"#157#161"{GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}})()
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:226
  [4] get!(default::GPUCompiler.var"#157#161"{GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}}, h::Dict{GPUCompiler.CompilerJob, String}, key::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams})
    @ Base ./dict.jl:458
  [5] macro expansion
    @ ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:224 [inlined]
  [6] emit_llvm(job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/utils.jl:116
  [7] emit_llvm(job::GPUCompiler.CompilerJob)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/utils.jl:114
  [8] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:95
  [9] compile_unhooked
    @ ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:80 [inlined]
 [10] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:67
 [11] compile
    @ ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:55 [inlined]
 [12] #1181
    @ ~/.julia/packages/CUDA/LhtzZ/src/compiler/compilation.jl:250 [inlined]
 [13] JuliaContext(f::CUDA.var"#1181#1184"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:34
 [14] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/driver.jl:25
 [15] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/LhtzZ/src/compiler/compilation.jl:249
 [16] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/execution.jl:245
 [17] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/3QaEh/src/execution.jl:159
 [18] macro expansion
    @ ~/.julia/packages/CUDA/LhtzZ/src/compiler/execution.jl:373 [inlined]
 [19] macro expansion
    @ ./lock.jl:273 [inlined]
 [20] cufunction(f::typeof(df), tt::Type{Tuple{CuDeviceVector{Float32, 1}, CuDeviceVector{Float32, 1}}}; kwargs::@Kwargs{})
    @ CUDA ~/.julia/packages/CUDA/LhtzZ/src/compiler/execution.jl:368
 [21] cufunction(f::typeof(df), tt::Type{Tuple{CuDeviceVector{Float32, 1}, CuDeviceVector{Float32, 1}}})
    @ CUDA ~/.julia/packages/CUDA/LhtzZ/src/compiler/execution.jl:365
 [22] top-level scope
    @ ~/.julia/packages/CUDA/LhtzZ/src/compiler/execution.jl:112

This makes me think that we are doing something fundamentally wrong with type inference in the case of nested compilation.

vchuravy avatar May 15 '25 20:05 vchuravy

So yeah we are constructing a primal_job that is hella wrong

primal_job = GPUCompiler.CompilerJob{GPUCompiler.NativeCompilerTarget, Enzyme.Compiler.PrimalCompilerParams}(MethodInstance for f(::CuDeviceVector{Float32, 1}), CompilerConfig for GPUCompiler.NativeCompilerTarget, 0x00000000000068a4)

vchuravy avatar May 15 '25 20:05 vchuravy

do we have a parent that's valid?

wsmoses avatar May 15 '25 20:05 wsmoses

This is the annouced change from https://github.com/JuliaGPU/GPUCompiler.jl/pull/668#issuecomment-2665477244

We are matching the wrong function and should be looking at the config instead. I will try rewriting all that tomorrow morning.

vchuravy avatar May 15 '25 20:05 vchuravy