Enzyme.jl
Enzyme.jl copied to clipboard
Segmentation fault on Julia 1.11 in abs_typeof
[104143] signal 11 (1): Segmentation fault
in expression starting at /home/vchuravy/src/Enzyme/amdgpu/amdgpu.jl:23
Typeof at ./boot.jl:265 [inlined]
abs_typeof at /home/vchuravy/src/Enzyme/src/absint.jl:351
abs_typeof at /home/vchuravy/src/Enzyme/src/absint.jl:314 [inlined]
check_ir! at /home/vchuravy/src/Enzyme/src/compiler/validation.jl:1036
check_ir! at /home/vchuravy/src/Enzyme/src/compiler/validation.jl:469
check_ir! at /home/vchuravy/src/Enzyme/src/compiler/validation.jl:212
check_ir at /home/vchuravy/src/Enzyme/src/compiler/validation.jl:181 [inlined]
compile_unhooked at /home/vchuravy/src/Enzyme/src/compiler.jl:4060
unknown function (ip: 0x7f058ebe8666)
#compile#153 at /home/vchuravy/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:67
compile at /home/vchuravy/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:55 [inlined]
#91 at /home/vchuravy/src/Enzyme/src/Enzyme.jl:1318
#JuliaContext#152 at /home/vchuravy/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:34
JuliaContext at /home/vchuravy/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:25 [inlined]
tape_type at /home/vchuravy/src/Enzyme/src/Enzyme.jl:1317 [inlined]
#augmented_primal#25 at /home/vchuravy/.julia/packages/AMDGPU/IDGfT/ext/AMDGPUEnzymeCoreExt/AMDGPUEnzymeCoreExt.jl:193
augmented_primal at /home/vchuravy/.julia/packages/AMDGPU/IDGfT/ext/AMDGPUEnzymeCoreExt/AMDGPUEnzymeCoreExt.jl:181 [inlined]
macro expansion at /home/vchuravy/.julia/packages/AMDGPU/IDGfT/src/highlevel.jl:158 [inlined]
square! at /home/vchuravy/src/Enzyme/amdgpu/amdgpu.jl:13 [inlined]
diffejulia_square__33133wrap at /home/vchuravy/src/Enzyme/amdgpu/amdgpu.jl:0
macro expansion at /home/vchuravy/src/Enzyme/src/compiler.jl:5610 [inlined]
enzyme_call at /home/vchuravy/src/Enzyme/src/compiler.jl:5144 [inlined]
CombinedAdjointThunk at /home/vchuravy/src/Enzyme/src/compiler.jl:5019 [inlined]
autodiff at /home/vchuravy/src/Enzyme/src/Enzyme.jl:517 [inlined]
autodiff at /home/vchuravy/src/Enzyme/src/Enzyme.jl:558 [inlined]
autodiff at /home/vchuravy/src/Enzyme/src/Enzyme.jl:530
unknown function (ip: 0x7f058ebc7663)
macro expansion at ./timing.jl:581 [inlined]
top-level scope at /home/vchuravy/src/Enzyme/amdgpu/amdgpu.jl:315
jl_toplevel_eval_flex at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/src/toplevel.c:934
jl_toplevel_eval_flex at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/src/toplevel.c:886
ijl_toplevel_eval_in at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/src/toplevel.c:994
eval at ./boot.jl:430 [inlined]
include_string at ./loading.jl:2734
_include at ./loading.jl:2794
include at ./Base.jl:557
jfptr_include_46879.1 at /home/vchuravy/.julia/juliaup/julia-1.11.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_start at ./client.jl:531
jfptr__start_73430.1 at /home/vchuravy/.julia/juliaup/julia-1.11.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
jl_apply at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/src/julia.h:2157 [inlined]
true_main at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/src/jlapi.c:900
jl_repl_entrypoint at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/src/jlapi.c:1059
main at /cache/build/tester-amdci5-12/julialang/julia-release-1-dot-11/cli/loader_exe.c:58
unknown function (ip: 0x7f0602a2a6b4)
__libc_start_main at /usr/lib/libc.so.6 (unknown line)
unknown function (ip: 0x4010b8)
Allocations: 836325409 (Pool: 836306653; Big: 18756); GC: 173
fish: Job 1, 'julia --project=amdgpu amdgpu/a…' terminated by signal SIGSEGV (Address boundary error)
This was with:
using AMDGPU
using Enzyme
using IntelITT
function square_kernel!(x)
i = workitemIdx().x
x[i] *= x[i]
sync_workgroup()
return nothing
end
# basic squaring on GPU
function square!(x)
@roc gridsize = length(x) groupsize = length(x) square_kernel!(x)
return nothing
end
A = AMDGPU.ones(64)
dA = AMDGPU.ones(64)
IntelITT.@task "primal" begin
@time square!(A)
end
A .= (1:1:64)
dA .= 1
IntelITT.@task "gradient" begin
@time Enzyme.autodiff(Reverse, square!, Duplicated(A, dA))
end
@assert all(dA .≈ (2:2:128))
Sadly using AMDGPU so rr is a no-go...
Depressingly enough, on 1.11 when it doesn't crash, it calculates the wrong thing.
# 377.761956 seconds (794.20 M allocations: 37.793 GiB, 2.14% gc time, 99.78% compilation time)
# dA = Float32[10.0, 48.0, 144.0, 320.0, 600.0, 1008.0, 1568.0, 2304.0, 266004.0, 444800.0, 709060.0, 1.086336f6, 1.609556f6, 2.317504f6, 3.2553f6, 4.47488f6, 20808.0, 24624.0, 28880.0, 33600.0, 38808.0, 44528.0, 50784.0, 57600.0, 4.0692496f7, 4.942912f7, 5.960595f7, 7.139418f7, 8.4978f7, 1.005552f8, 1.18337544f8, 1.385513f8, 581856.0, 636140.0, 693700.0, 754632.0, 819032.0, 886996.0, 958620.0, 1.034f6, 585316.0, 628320.0, 673380.0, 720544.0, 769860.0, 821376.0, 875140.0, 931200.0, 980000.0, 1.0404f6, 1.103232f6, 1.168544f6, 1.236384f6, 1.3068f6, 1.37984f6, 1.455552f6, 1.533984f6, 1.615184f6, 1.6992f6, 1.78608f6, 1.875872f6, 1.968624f6, 2.064384f6, 2.1632f6]
# [Detaching after vfork from child process 229222]
# [Detaching after vfork from child process 229247]
# ERROR: LoadError: AssertionError: all(dA .≈ 2:2:128)
On 1.10 one runs into https://github.com/JuliaGPU/AMDGPU.jl/issues/804
there's a chance this is fixed by https://github.com/EnzymeAD/Enzyme.jl/pull/2810 [and/or if not a small modification to would fix]
@vchuravy is this still erring?