Segfault in Enzyme 0.13
On Enzyme 0.13.8 I am getting a segfault when compiling the following code
using Enzyme
using TransformVariables
struct Model{T,F}
spin::T
θo::T
scene::F
end
struct Material{T}
rpeak::T
p1::T
end
function (prof::Material)(X,Y)
return (X^2 + Y^2)^(prof.p1/2)
end
function Model(θ::NamedTuple)
(;spin,rpeak,p1) = θ
Model(spin, 1.0, Material(rpeak, p1))
end
function ip(m::Model{T}, p) where {T}
return m.scene((p.X), (p.Y))
end
npix = 40
fovx = (15.0)
fovy = (15.0)
function model(θ, meta)
return Model(θ)
end
struct Foo{G,T}
grid::G
trf::T
end
function (t::Foo)(x)
p, lj = TransformVariables.transform_and_logjac(t.trf, x)
img = ip.(Ref(model(p, nothing)), t.grid)
return sum(abs2, img) + lj
end
g = [(X=x, Y=y) for x in range(-5.0, 5.0, 2), y in range(-5.0, 5.0, 2)]
trf = as((;spin = as(Real, 0.1, 0.9),
θo = as(Real, 10.0, 50.0),
θs = as(Real, 50.0, 90.0),
rpeak = as(Real, 2.0, 4.0),
p1 = as(Real, 1.0, 3.0))
)
f = Foo(g, trf)
f(randn(dimension(trf)))
x = randn(dimension(trf))
dx = zero(x)
autodiff(set_runtime_activity(Reverse), Const(f), Duplicated(x, dx))
This is a little temperamental, so I wasn't able to reduce it more than this, but I'll try again sometime later this week, hopefully.
One of the things that is weird is that Julia is reporting the segfault is happening in TransformVariables in this region of code
function transform_and_logjac(t::VectorTransform, x::AbstractVector)
@argcheck dimension(t) == length(x)
...
so it is just a dimension check which should be inactive.
Slight reduction
using Enzyme
using TransformVariables
struct Material{T}
rpeak::T
p1::T
end
function ip(m::Material, p)
(;X, Y) = p
return (X^2 + Y^2)^m.p1
end
struct Foo{G,T}
grid::G
trf::T
end
function (t::Foo)(x)
p = TransformVariables.transform(t.trf, x)
s = 0.0
m = Material(p.rpeak, p.p1)
@inbounds for i in eachindex(t.grid)
s += ip(m, t.grid[i])
end
return s
end
g = [(X=0.0, Y=0.0);;]
trf = as((;foo = as(Real, 0.1, 0.9),
rpeak = as(Real, 2.0, 4.0),
p1 = as(Real, 1.0, 3.0))
)
f = Foo(g, trf)
x = randn(dimension(trf))
dx = zero(x)
f(x)
autodiff(Reverse, Const(f), Active, Duplicated(x, dx))
which gives the error
[2944868] signal (11.128): Segmentation fault
in expression starting at /home/ptiede/Research/Test/gcq.jl:40
unknown function (ip: 0x74607eb7d4f8)
Foo at /home/ptiede/Research/Test/gcq.jl:20 [inlined]
Foo at /home/ptiede/Research/Test/gcq.jl:0 [inlined]
diffejulia_Foo_809_inner_1wrap at /home/ptiede/Research/Test/gcq.jl:0
macro expansion at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/compiler.jl:8839 [inlined]
enzyme_call at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/compiler.jl:8405 [inlined]
CombinedAdjointThunk at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/compiler.jl:8178 [inlined]
autodiff at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/Enzyme.jl:491
unknown function (ip: 0x746066108761)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
do_call at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:126
eval_value at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:223
eval_stmt_value at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:174 [inlined]
eval_body at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:617
jl_interpret_toplevel_thunk at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:775
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:934
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:877
ijl_toplevel_eval_in at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:985
eval at ./boot.jl:385 [inlined]
include_string at ./loading.jl:2076
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
_include at ./loading.jl:2136
include at ./Base.jl:495
jfptr_include_46447.1 at /home/ptiede/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
exec_options at ./client.jl:318
_start at ./client.jl:552
jfptr__start_82798.1 at /home/ptiede/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
true_main at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/jlapi.c:582
jl_repl_entrypoint at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/jlapi.c:731
main at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/cli/loader_exe.c:58
unknown function (ip: 0x74607fc29d8f)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x4010b8)
Allocations: 21395479 (Pool: 21358068; Big: 37411); GC: 33
[1] 2944868 segmentation fault (core dumped) julia --project gcq.jl
@gbaraldi could I nerdsnipe you to investigating this as I continue 1.11 stuff?
I just noticed something pretty funky. If I change the definition of ip to
function ip(m::Material, p)
(;X, Y) = p
return m.p1
end
I don't get a segfault but the runtime error
ERROR: LoadError: ArgumentError: dimension(t) == length(x) must hold. Got
dimension(t) => 3
length(x) => 3
Stacktrace:
[1] throw_check_error(info::Any)
@ ArgCheck ~/.julia/packages/ArgCheck/CA5vv/src/checks.jl:280
[2] transform
@ ~/.julia/packages/TransformVariables/0OrIV/src/generic.jl:267
[3] Foo
@ ~/Research/Test/gcq.jl:20 [inlined]
[4] Foo
@ ~/Research/Test/gcq.jl:0 [inlined]
[5] diffejulia_Foo_809_inner_1wrap
@ ~/Research/Test/gcq.jl:0
[6] macro expansion
@ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8842 [inlined]
[7] enzyme_call
@ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8408 [inlined]
[8] CombinedAdjointThunk
@ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8181 [inlined]
[9] autodiff(rmode::ReverseMode{false, false, FFIABI, false, false}, f::Const{Foo{Matrix{@NamedTuple{X::Float64, Y::Float64}}, TransformVariables.TransformTuple{@NamedTuple{foo::TransformVariables.ScaledShiftedLogistic{Float64}, rpeak::TransformVariables.ScaledShiftedLogistic{Float64}, p1::TransformVariables.ScaledShiftedLogistic{Float64}}}}}, ::Type{Active}, args::Duplicated{Vector{Float64}})
@ Enzyme ~/.julia/packages/Enzyme/4abVQ/src/Enzyme.jl:491
[10] top-level scope
@ ~/Research/Test/gcq.jl:40
in expression starting at /home/ptiede/Research/Test/gcq.jl:40
This is pretty funny because, as the error message tells you, dimension(t) == length(x)==3.
That might have the same source as the segfault (assuming the primal doesn’t error similarly)
On Tue, Oct 15, 2024 at 10:07 PM Paul Tiede @.***> wrote:
I just noticed something pretty funky. If I change the definition of ip to
function ip(m::Material, p) (;X, Y) = p return m.p1end
I don't get a segfault but the runtime error
ERROR: LoadError: ArgumentError: dimension(t) == length(x) must hold. Gotdimension(t) => 3length(x) => 3 Stacktrace: [1] throw_check_error(info::Any) @ ArgCheck ~/.julia/packages/ArgCheck/CA5vv/src/checks.jl:280 [2] transform @ ~/.julia/packages/TransformVariables/0OrIV/src/generic.jl:267 [3] Foo @ ~/Research/Test/gcq.jl:20 [inlined] [4] Foo @ ~/Research/Test/gcq.jl:0 [inlined] [5] diffejulia_Foo_809_inner_1wrap @ ~/Research/Test/gcq.jl:0 [6] macro expansion @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8842 [inlined] [7] enzyme_call @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8408 [inlined] [8] CombinedAdjointThunk @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8181 [inlined] [9] autodiff(rmode::ReverseMode{false, false, FFIABI, false, false}, @.{X::Float64, Y::Float64}}, @.{foo::TransformVariables.ScaledShiftedLogistic{Float64}, rpeak::TransformVariables.ScaledShiftedLogistic{Float64}, p1::TransformVariables.ScaledShiftedLogistic{Float64}}}}}, ::Type{Active}, args::Duplicated{Vector{Float64}}) @ Enzyme ~/.julia/packages/Enzyme/4abVQ/src/Enzyme.jl:491 [10] top-level scope @ ~/Research/Test/gcq.jl:40 in expression starting at /home/ptiede/Research/Test/gcq.jl:40
This is pretty funny because, as the error message tells you, dimension(t) == length(x)==3.
— Reply to this email directly, view it on GitHub https://github.com/EnzymeAD/Enzyme.jl/issues/1964#issuecomment-2415630818, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAJTUXBNQGYQNEXL7CZWUC3Z3XJ5VAVCNFSM6AAAAABP6BHPCWVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDIMJVGYZTAOBRHA . You are receiving this because you commented.Message ID: <EnzymeAD/Enzyme. @.***>
The primal is fine and ya I am guessing its the same culprit
using Enzyme
Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true
_transform_tuple(x, index, ::Tuple{}) =
(), index
function logistic(x)
e = exp(x)
return x < -744 ? zero(x) : x > 36 ? one(x) : e / (one(x) + e)
end
function _transform_tuple(x, index, ts)
yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
yrest, index′′ = _transform_tuple(x, index+1, Base.tail(ts))
(yfirst, yrest...), index′′
end
@noinline function mytransform(transformations, x)
y, index′ = _transform_tuple(x, 1, transformations)
y
end
function run(trf, x)
p = mytransform(trf, x)
return 1/(p[2])
end
trf = (0.1, 2.0, 1.0)
x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))
using Enzyme
Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true
_transform_tuple(x, index, ::Tuple{}) =
(), index
function logistic(x)
return x > 36 ? one(x) : 1 / (one(x) + 1/x)
end
function _transform_tuple(x, index, ts)
yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
yrest, index′′ = _transform_tuple(x, index+1, Base.tail(ts))
(yfirst, yrest...), index′′
end
@noinline function mytransform(transformations, x)
y, index′ = _transform_tuple(x, 1, transformations)
y
end
function run(trf, x)
p = mytransform(trf, x)
return 1/(p[2])
end
trf = (0.1, 2.0, 1.0)
x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))
Last of seg's (instead of RO mem error):
using Enzyme
Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true
_transform_tuple(x, index, ::Tuple{}) =
(), index
function logistic(x)
return x > 36 ? one(x) : 1 / (one(x) + 1/x)
end
@noinline function _transform_tuple(x, ts)
yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
(yfirst, yfirst), 2
end
@noinline function mytransform(ts, x)
yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
yrest, _ = _transform_tuple(x, Base.tail(ts))
(yfirst, yrest...)
end
function run(trf, x)
p = mytransform(trf, x)
return 1/(p[2])
end
trf = (0.1, 2.0, 1.0)
x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))
using Enzyme
Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true
_transform_tuple(x, index, ::Tuple{}) =
(), index
function logistic(x)
return x > 36 ? one(x) : 1 / (one(x) + 1/x)
end
@noinline function _transform_tuple(x)
yfirst = logistic(@inbounds x[1])
(yfirst, yfirst), 2
end
@noinline function mytransform(ts, x)
yfirst = logistic(@inbounds x[1])
yrest, _ = _transform_tuple(x)
(yfirst, yrest...)
end
function run(trf, x)
p = mytransform(trf, x)
return 1/(p[2])
end
trf = 0.1
x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))
ought be resolved by https://github.com/EnzymeAD/Enzyme.jl/pull/1990