Enzyme.jl icon indicating copy to clipboard operation
Enzyme.jl copied to clipboard

Segfault in Enzyme 0.13

Open ptiede opened this issue 1 year ago • 10 comments

On Enzyme 0.13.8 I am getting a segfault when compiling the following code

using Enzyme
using TransformVariables

struct Model{T,F}
    spin::T
    θo::T
    scene::F
end
struct Material{T}
    rpeak::T
    p1::T
end
function (prof::Material)(X,Y) 
    return (X^2 + Y^2)^(prof.p1/2) 
end

function Model(θ::NamedTuple) 
    (;spin,rpeak,p1) = θ
    Model(spin, 1.0, Material(rpeak, p1))
end

function ip(m::Model{T}, p) where {T} 
    return m.scene((p.X), (p.Y))
end

npix = 40
fovx = (15.0)
fovy = (15.0)


function model(θ, meta)
    return Model(θ)
end

struct Foo{G,T}
    grid::G
    trf::T
end

function (t::Foo)(x)
    p, lj =  TransformVariables.transform_and_logjac(t.trf, x)
    img = ip.(Ref(model(p, nothing)), t.grid)
    return sum(abs2, img) + lj
end

g = [(X=x, Y=y) for x in range(-5.0, 5.0, 2), y in range(-5.0, 5.0, 2)]
trf = as((;spin  = as(Real, 0.1, 0.9), 
           θo    = as(Real, 10.0, 50.0), 
           θs    = as(Real, 50.0, 90.0), 
           rpeak = as(Real, 2.0, 4.0), 
           p1    = as(Real, 1.0, 3.0))
           )

f = Foo(g, trf)
f(randn(dimension(trf)))

x = randn(dimension(trf))
dx = zero(x)
autodiff(set_runtime_activity(Reverse), Const(f), Duplicated(x, dx))

This is a little temperamental, so I wasn't able to reduce it more than this, but I'll try again sometime later this week, hopefully.

ptiede avatar Oct 15 '24 01:10 ptiede

One of the things that is weird is that Julia is reporting the segfault is happening in TransformVariables in this region of code

function transform_and_logjac(t::VectorTransform, x::AbstractVector)
    @argcheck dimension(t) == length(x)
   ...

so it is just a dimension check which should be inactive.

ptiede avatar Oct 15 '24 01:10 ptiede

Slight reduction

using Enzyme
using TransformVariables

struct Material{T}
    rpeak::T
    p1::T
end

function ip(m::Material, p)
    (;X, Y) = p
    return (X^2 + Y^2)^m.p1
end

struct Foo{G,T}
    grid::G
    trf::T
end

function (t::Foo)(x)
    p =  TransformVariables.transform(t.trf, x)
    s = 0.0
    m = Material(p.rpeak, p.p1)
    @inbounds for i in eachindex(t.grid)
        s += ip(m, t.grid[i])
    end
    return s
end


g = [(X=0.0, Y=0.0);;]
trf = as((;foo  = as(Real, 0.1, 0.9), 
           rpeak = as(Real, 2.0, 4.0), 
           p1    = as(Real, 1.0, 3.0))
        )

f = Foo(g, trf)
x = randn(dimension(trf))
dx = zero(x)
f(x)
autodiff(Reverse, Const(f), Active, Duplicated(x, dx))

which gives the error

[2944868] signal (11.128): Segmentation fault
in expression starting at /home/ptiede/Research/Test/gcq.jl:40
unknown function (ip: 0x74607eb7d4f8)
Foo at /home/ptiede/Research/Test/gcq.jl:20 [inlined]
Foo at /home/ptiede/Research/Test/gcq.jl:0 [inlined]
diffejulia_Foo_809_inner_1wrap at /home/ptiede/Research/Test/gcq.jl:0
macro expansion at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/compiler.jl:8839 [inlined]
enzyme_call at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/compiler.jl:8405 [inlined]
CombinedAdjointThunk at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/compiler.jl:8178 [inlined]
autodiff at /home/ptiede/.julia/packages/Enzyme/Vjlrr/src/Enzyme.jl:491
unknown function (ip: 0x746066108761)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
do_call at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:126
eval_value at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:223
eval_stmt_value at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:174 [inlined]
eval_body at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:617
jl_interpret_toplevel_thunk at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:775
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:934
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:877
ijl_toplevel_eval_in at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:985
eval at ./boot.jl:385 [inlined]
include_string at ./loading.jl:2076
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
_include at ./loading.jl:2136
include at ./Base.jl:495
jfptr_include_46447.1 at /home/ptiede/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
exec_options at ./client.jl:318
_start at ./client.jl:552
jfptr__start_82798.1 at /home/ptiede/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
true_main at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/jlapi.c:582
jl_repl_entrypoint at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/jlapi.c:731
main at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/cli/loader_exe.c:58
unknown function (ip: 0x74607fc29d8f)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x4010b8)
Allocations: 21395479 (Pool: 21358068; Big: 37411); GC: 33
[1]    2944868 segmentation fault (core dumped)  julia --project gcq.jl

ptiede avatar Oct 15 '24 12:10 ptiede

@gbaraldi could I nerdsnipe you to investigating this as I continue 1.11 stuff?

wsmoses avatar Oct 15 '24 20:10 wsmoses

I just noticed something pretty funky. If I change the definition of ip to

function ip(m::Material, p)
    (;X, Y) = p
    return m.p1
end

I don't get a segfault but the runtime error

ERROR: LoadError: ArgumentError: dimension(t) == length(x) must hold. Got
dimension(t) => 3
length(x) => 3
Stacktrace:
  [1] throw_check_error(info::Any)
    @ ArgCheck ~/.julia/packages/ArgCheck/CA5vv/src/checks.jl:280
  [2] transform
    @ ~/.julia/packages/TransformVariables/0OrIV/src/generic.jl:267
  [3] Foo
    @ ~/Research/Test/gcq.jl:20 [inlined]
  [4] Foo
    @ ~/Research/Test/gcq.jl:0 [inlined]
  [5] diffejulia_Foo_809_inner_1wrap
    @ ~/Research/Test/gcq.jl:0
  [6] macro expansion
    @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8842 [inlined]
  [7] enzyme_call
    @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8408 [inlined]
  [8] CombinedAdjointThunk
    @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8181 [inlined]
  [9] autodiff(rmode::ReverseMode{false, false, FFIABI, false, false}, f::Const{Foo{Matrix{@NamedTuple{X::Float64, Y::Float64}}, TransformVariables.TransformTuple{@NamedTuple{foo::TransformVariables.ScaledShiftedLogistic{Float64}, rpeak::TransformVariables.ScaledShiftedLogistic{Float64}, p1::TransformVariables.ScaledShiftedLogistic{Float64}}}}}, ::Type{Active}, args::Duplicated{Vector{Float64}})
    @ Enzyme ~/.julia/packages/Enzyme/4abVQ/src/Enzyme.jl:491
 [10] top-level scope
    @ ~/Research/Test/gcq.jl:40
in expression starting at /home/ptiede/Research/Test/gcq.jl:40

This is pretty funny because, as the error message tells you, dimension(t) == length(x)==3.

ptiede avatar Oct 16 '24 03:10 ptiede

That might have the same source as the segfault (assuming the primal doesn’t error similarly)

On Tue, Oct 15, 2024 at 10:07 PM Paul Tiede @.***> wrote:

I just noticed something pretty funky. If I change the definition of ip to

function ip(m::Material, p) (;X, Y) = p return m.p1end

I don't get a segfault but the runtime error

ERROR: LoadError: ArgumentError: dimension(t) == length(x) must hold. Gotdimension(t) => 3length(x) => 3 Stacktrace: [1] throw_check_error(info::Any) @ ArgCheck ~/.julia/packages/ArgCheck/CA5vv/src/checks.jl:280 [2] transform @ ~/.julia/packages/TransformVariables/0OrIV/src/generic.jl:267 [3] Foo @ ~/Research/Test/gcq.jl:20 [inlined] [4] Foo @ ~/Research/Test/gcq.jl:0 [inlined] [5] diffejulia_Foo_809_inner_1wrap @ ~/Research/Test/gcq.jl:0 [6] macro expansion @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8842 [inlined] [7] enzyme_call @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8408 [inlined] [8] CombinedAdjointThunk @ ~/.julia/packages/Enzyme/4abVQ/src/compiler.jl:8181 [inlined] [9] autodiff(rmode::ReverseMode{false, false, FFIABI, false, false}, @.{X::Float64, Y::Float64}}, @.{foo::TransformVariables.ScaledShiftedLogistic{Float64}, rpeak::TransformVariables.ScaledShiftedLogistic{Float64}, p1::TransformVariables.ScaledShiftedLogistic{Float64}}}}}, ::Type{Active}, args::Duplicated{Vector{Float64}}) @ Enzyme ~/.julia/packages/Enzyme/4abVQ/src/Enzyme.jl:491 [10] top-level scope @ ~/Research/Test/gcq.jl:40 in expression starting at /home/ptiede/Research/Test/gcq.jl:40

This is pretty funny because, as the error message tells you, dimension(t) == length(x)==3.

— Reply to this email directly, view it on GitHub https://github.com/EnzymeAD/Enzyme.jl/issues/1964#issuecomment-2415630818, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAJTUXBNQGYQNEXL7CZWUC3Z3XJ5VAVCNFSM6AAAAABP6BHPCWVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDIMJVGYZTAOBRHA . You are receiving this because you commented.Message ID: <EnzymeAD/Enzyme. @.***>

wsmoses avatar Oct 16 '24 03:10 wsmoses

The primal is fine and ya I am guessing its the same culprit

ptiede avatar Oct 16 '24 04:10 ptiede

using Enzyme

Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true


_transform_tuple(x, index, ::Tuple{}) =
    (), index


function logistic(x)
    e = exp(x)
    return x < -744 ? zero(x) : x > 36 ? one(x) : e / (one(x) + e)
end

function _transform_tuple(x, index, ts)
    yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
    yrest, index′′ = _transform_tuple(x, index+1, Base.tail(ts))
    (yfirst, yrest...), index′′
end


@noinline function mytransform(transformations, x)
    y, index′ = _transform_tuple(x, 1, transformations)
    y
end

function run(trf, x)
    p =  mytransform(trf, x)
    return 1/(p[2])
end

trf = (0.1, 2.0, 1.0)

x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))

wsmoses avatar Oct 21 '24 00:10 wsmoses

using Enzyme

Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true


_transform_tuple(x, index, ::Tuple{}) =
    (), index


function logistic(x)
    return x > 36 ? one(x) : 1 / (one(x) + 1/x)
end

function _transform_tuple(x, index, ts)
    yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
    yrest, index′′ = _transform_tuple(x, index+1, Base.tail(ts))
    (yfirst, yrest...), index′′
end


@noinline function mytransform(transformations, x)
    y, index′ = _transform_tuple(x, 1, transformations)
    y
end

function run(trf, x)
    p =  mytransform(trf, x)
    return 1/(p[2])
end

trf = (0.1, 2.0, 1.0)

x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))

wsmoses avatar Oct 21 '24 01:10 wsmoses

Last of seg's (instead of RO mem error):

using Enzyme

Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true


_transform_tuple(x, index, ::Tuple{}) =
    (), index


function logistic(x)
    return x > 36 ? one(x) : 1 / (one(x) + 1/x)
end

@noinline function _transform_tuple(x, ts)
    yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
    (yfirst, yfirst), 2
end


@noinline function mytransform(ts, x)
    yfirst = muladd(logistic(@inbounds x[1]), 2.0, first(ts))
    yrest, _ = _transform_tuple(x, Base.tail(ts))
    (yfirst, yrest...)
end

function run(trf, x)
    p =  mytransform(trf, x)
    return 1/(p[2])
end

trf = (0.1, 2.0, 1.0)

x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))

wsmoses avatar Oct 21 '24 01:10 wsmoses

using Enzyme

Enzyme.API.printall!(true)
Enzyme.Compiler.DumpPostWrap[] = true
Enzyme.Compiler.DumpPostOpt[] = true
Enzyme.Compiler.DumpPreOpt[] = true


_transform_tuple(x, index, ::Tuple{}) =
    (), index


function logistic(x)
    return x > 36 ? one(x) : 1 / (one(x) + 1/x)
end

@noinline function _transform_tuple(x)
    yfirst = logistic(@inbounds x[1])
    (yfirst, yfirst), 2
end


@noinline function mytransform(ts, x)
    yfirst = logistic(@inbounds x[1])
    yrest, _ = _transform_tuple(x)
    (yfirst, yrest...)
end

function run(trf, x)
    p =  mytransform(trf, x)
    return 1/(p[2])
end

trf = 0.1

x = randn(3)
dx = zero(x)
run(trf, x)
autodiff(Reverse, run, Active, Const(trf), Duplicated(x, dx))

wsmoses avatar Oct 21 '24 01:10 wsmoses

ought be resolved by https://github.com/EnzymeAD/Enzyme.jl/pull/1990

wsmoses avatar Oct 22 '24 23:10 wsmoses