Enzyme.jl
Enzyme.jl copied to clipboard
Missing invoke handling causes infinite loop
See CI on 1.10 cc @vchuravy @gbaraldi
See https://github.com/EnzymeAD/Enzyme.jl/actions/runs/6786335843/job/18446651673
WARNING: Method definition invsin(Any) in module Main at /home/runner/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:515 overwritten at /home/runner/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:1137.
generic: Error During Test at /home/runner/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:1142
Got exception outside of a @test
StackOverflowError:
Stacktrace:
[1] runtime_generic_augfwd(::Type{Val{(false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)}}, ::Val{1}, ::Val{(true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)}, ::Val{@NamedTuple{1, 2, 3}}, ::typeof(hvcat), ::Nothing, ::NTuple{5, Int64}, ::Nothing, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64)
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:1433
[2] hvcat
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:1269 [inlined]
[3] hvcat
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:0 [inlined]
[4] augmented_julia_hvcat_41331_inner_1wrap
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:0
[5] macro expansion
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9836 [inlined]
[6] enzyme_call
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9514 [inlined]
[7] AugmentedForwardThunk
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9488 [inlined]
[8] macro expansion
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:1390 [inlined]
--- the last 8 lines are repeated 3876 more times ---
[31017] runtime_generic_augfwd(::Type{Val{(false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)}}, ::Val{1}, ::Val{(true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)}, ::Val{@NamedTuple{1, 2, 3}}, ::typeof(hvcat), ::Nothing, ::NTuple{5, Int64}, ::Nothing, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64, ::Float64)
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:1433
[31018] hvcat
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:1269 [inlined]
[31019] hvcat
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:0 [inlined]
[31020] augmented_julia_hvcat_41255_inner_1wrap
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:0
[31021] macro expansion
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9836 [inlined]
[31022] enzyme_call
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9514 [inlined]
[31023] AugmentedForwardThunk
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9488 [inlined]
[31024] macro expansion
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:1390 [inlined]
[31025] runtime_generic_augfwd(::Type{Val{(false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false, true, false)}}, ::Val{1}, ::Val{(true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)}, ::Val{@NamedTuple{1, 2, 3}}, ::typeof(hvcat), ::Nothing, ::NTuple{5, Int64}, ::Nothing, ::Float64, ::Float64, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Float64, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Float64, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Float64, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Nothing, ::Float64, ::Float64, ::Float64, ::Nothing)
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:1433
[31026] hvcat
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/SparseArrays/src/sparsevector.jl:1269 [inlined]
[31027] whocallsmorethan30args
@ ~/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:1170 [inlined]
[31028] diffejulia_whocallsmorethan30args_41148wrap
@ ~/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:0
[31029] macro expansion
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9836 [inlined]
[31030] enzyme_call
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9514 [inlined]
[31031] CombinedAdjointThunk
@ Enzyme.Compiler ~/work/Enzyme.jl/Enzyme.jl/src/compiler.jl:9477 [inlined]
[31032] autodiff(::ReverseMode{false, FFIABI}, f::Const{var"#whocallsmorethan30args#166"}, ::Type{Active}, args::Duplicated{Matrix{Float64}})
@ Enzyme ~/work/Enzyme.jl/Enzyme.jl/src/Enzyme.jl:215
[31033] autodiff(::ReverseMode{false, FFIABI}, ::var"#whocallsmorethan30args#166", ::Type, ::Duplicated{Matrix{Float64}})
@ Enzyme ~/work/Enzyme.jl/Enzyme.jl/src/Enzyme.jl:224
[31034] macro expansion
@ ~/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:1182 [inlined]
[31035] macro expansion
@ /opt/hostedtoolcache/julia/1.10-nightly/x64/share/julia/stdlib/v1.10/Test/src/Test.jl:1577 [inlined]
[31036] top-level scope
@ ~/work/Enzyme.jl/Enzyme.jl/test/runtests.jl:1143
[31037] include(fname::String)
@ Base.MainInclude ./client.jl:489
[31038] top-level scope
@ none:6
[31039] eval
@ Core ./boot.jl:383 [inlined]
[31040] exec_options(opts::Base.JLOptions)
@ Base ./client.jl:291
Test Summary: | Pass Error Total Time
generic | 8 1 9 11.6s
using Enzyme
using LinearAlgebra
function whocallsmorethan30args(R)
temp = diag(R)
R_inv = [temp[1] 0. 0. 0. 0. 0.;
0. temp[2] 0. 0. 0. 0.;
0. 0. temp[3] 0. 0. 0.;
0. 0. 0. temp[4] 0. 0.;
0. 0. 0. 0. temp[5] 0.;
]
return sum(R_inv)
end
R = zeros(6,6)
dR = zeros(6, 6)
autodiff(Reverse, whocallsmorethan30args, Active, Duplicated(R, dR))
Hm can't reproduce locally. It was crashing on 1.10-beta, but is passing for me on rc1.
Even trying to directly reconstruct the call:
Enzyme.Compiler.runtime_generic_augfwd(
Val{(false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)},
Val{1}(),
Val{(true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true)}(),
Val{NamedTuple{(Symbol("1"), Symbol("2"), Symbol("3")),Tuple{Any,Any,Any}}}(),
hvcat,
nothing,
(6,6,6,6,6), nothing,
ntuple(_->1.0, 2*30)...)
So after a short investigation. The code behaves properly outside the test-suite. It is not clear why this recurses to begin with.
This reproduces in a file for me:
using Enzyme
using Statistics
Enzyme.API.printall!(true)
function whocallsmorethan30args(temp)
R_inv = [temp 0. 0. 0. 0. 0.;
0. temp 0. 0. 0. 0.;
0. 0. temp 0. 0. 0.;
0. 0. 0. temp 0. 0.;
0. 0. 0. 0. temp 0.;
]
return R_inv[1,1]
end
autodiff(Forward, whocallsmorethan30args, DuplicatedNoNeed, Duplicated(1.0,1.0))
I think I know what is happening and why, but not immediately a good fix.
Right now applylatest/applygeneric/invoke all become a applygeneric on runtime_generic_augfwd. However, in 1.10 the first call above becomes a jl_invoke(hvcat, ....) to a specialized method for all args. That jl_invoke becomes a call to autodiff hvcat(all those args). Once again the julia compiler decides to wrap that inside of a jl_invoke, instead of doing the specialized one, creating the loop.
In my opinion the best solution is for us to pass the mi down to thunk, but we cannot since a generated method cannot take a methodinstance in a val [which I know concerns you for other reasons, but thats my immediate thoughts fo what blocks]
This PR does a partial fix but hits a julia issue I don't understand with gpucompiler.jl: https://github.com/EnzymeAD/Enzyme.jl/pull/1169
Specifically the outermost function is this, which has a vararg at the end and has a different julia calling abi:
svec(typeof(hvcat), NTuple{5, Int64}, Float64, Float64, Vararg{Float64})
Could gpucompiler give us one with the usual ABI?
define nonnull {} addrspace(10)* @japi1_hvcat_3138({} addrspace(10)* %function, {} addrspace(10)** noalias nocapture noundef readonly %args, i32 %nargs) #0 !dbg !4 {
top:
%stackargs = alloca {} addrspace(10)**, align 8
store volatile {} addrspace(10)** %args, {} addrspace(10)*** %stackargs, align 8
%0 = call {}*** @julia.get_pgcstack()
%1 = bitcast {}*** %0 to {}**
%current_task = getelementptr inbounds {}*, {}** %1, i64 -14
%2 = bitcast {}** %current_task to i64*
%world_age = getelementptr inbounds i64, i64* %2, i64 15
%3 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %args, i32 0
%4 = load {} addrspace(10)*, {} addrspace(10)** %3, align 8, !tbaa !8, !invariant.load !7, !alias.scope !12, !noalias !15, !nonnull !7, !dereferenceable !20, !align !21
%5 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %args, i64 1
%6 = sub i32 %nargs, 1
%7 = bitcast {}*** %0 to {}**
%current_task1 = getelementptr inbounds {}*, {}** %7, i64 -14
%ptls_field = getelementptr inbounds {}*, {}** %current_task1, i64 16
%ptls_load = load {}*, {}** %ptls_field, align 8, !tbaa !22
%ptls = bitcast {}* %ptls_load to {}**
%8 = bitcast {}** %ptls to i64**
%9 = getelementptr inbounds i64*, i64** %8, i64 2
%safepoint = load i64*, i64** %9, align 8, !tbaa !8, !invariant.load !7
fence syncscope("singlethread") seq_cst
call void @julia.safepoint(i64* %safepoint), !dbg !24
fence syncscope("singlethread") seq_cst
%10 = addrspacecast {} addrspace(10)* %4 to {} addrspace(11)*, !dbg !25
%11 = bitcast {} addrspace(11)* %10 to [5 x i64] addrspace(11)*, !dbg !25
%12 = getelementptr inbounds [5 x i64], [5 x i64] addrspace(11)* %11, i32 0, i32 0, !dbg !25
%unbox = load i64, i64 addrspace(11)* %12, align 8, !dbg !29, !tbaa !33, !alias.scope !37, !noalias !38
%13 = call nonnull {} addrspace(10)* @ijl_alloc_array_2d({} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515882277456 to {}*) to {} addrspace(10)*), i64 5, i64 %unbox), !dbg !29
%14 = addrspacecast {} addrspace(10)* %13 to {} addrspace(11)*, !dbg !39
%15 = bitcast {} addrspace(11)* %14 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !39
%arraylen_ptr = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %15, i32 0, i32 1, !dbg !39
%arraylen = load i64, i64 addrspace(11)* %arraylen_ptr, align 8, !dbg !39, !tbaa !8, !range !43, !invariant.load !7, !alias.scope !12, !noalias !15
%16 = sub i32 %nargs, 1, !dbg !44
%17 = sext i32 %16 to i64, !dbg !44
%18 = icmp eq i64 %arraylen, %17, !dbg !46
%19 = zext i1 %18 to i8, !dbg !46
%20 = trunc i8 %19 to i1, !dbg !52
%21 = xor i1 %20, true, !dbg !52
%22 = zext i1 %21 to i8, !dbg !42
%23 = trunc i8 %22 to i1, !dbg !42
%24 = xor i1 %23, true, !dbg !42
br i1 %24, label %L14, label %L8, !dbg !42
L8: ; preds = %top
%25 = addrspacecast {} addrspace(10)* %13 to {} addrspace(11)*, !dbg !55
%26 = bitcast {} addrspace(11)* %25 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !55
%arraylen_ptr2 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %26, i32 0, i32 1, !dbg !55
%arraylen3 = load i64, i64 addrspace(11)* %arraylen_ptr2, align 8, !dbg !55, !tbaa !8, !range !43, !invariant.load !7, !alias.scope !12, !noalias !15
%27 = sub i32 %nargs, 1, !dbg !57
%28 = sext i32 %27 to i64, !dbg !57
%29 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %arraylen3), !dbg !56
%30 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %28), !dbg !56
%31 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* @ijl_invoke, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515739994608 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515715232560 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605968 to {}*) to {} addrspace(10)*), {} addrspace(10)* %29, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605936 to {}*) to {} addrspace(10)*), {} addrspace(10)* %30, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605904 to {}*) to {} addrspace(10)*)) #6, !dbg !56
%32 = insertvalue [1 x {} addrspace(10)*] zeroinitializer, {} addrspace(10)* %31, 0, !dbg !58
%33 = bitcast {}*** %0 to {}**, !dbg !56
%current_task4 = getelementptr inbounds {}*, {}** %33, i64 -14, !dbg !56
%box = call noalias nonnull dereferenceable(8) {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task4, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515785584480 to {}*) to {} addrspace(10)*)) #7, !dbg !56
%34 = bitcast {} addrspace(10)* %box to [1 x {} addrspace(10)*] addrspace(10)*, !dbg !56
store [1 x {} addrspace(10)*] %32, [1 x {} addrspace(10)*] addrspace(10)* %34, align 8, !dbg !56, !tbaa !33, !alias.scope !37, !noalias !38
%35 = addrspacecast {} addrspace(10)* %box to {} addrspace(12)*, !dbg !56
call void @ijl_throw({} addrspace(12)* %35), !dbg !56
unreachable, !dbg !56
L14: ; preds = %top
br i1 false, label %L72, label %L14.L15_crit_edge, !dbg !60
L14.L15_crit_edge: ; preds = %L14
br label %L15, !dbg !60
L15: ; preds = %L14.L15_crit_edge, %L71
%value_phi = phi i64 [ 1, %L14.L15_crit_edge ], [ %value_phi27, %L71 ]
%value_phi5 = phi i64 [ 1, %L14.L15_crit_edge ], [ %value_phi28, %L71 ]
%value_phi6 = phi i64 [ 1, %L14.L15_crit_edge ], [ %value_phi26, %L71 ]
%36 = sub i64 %value_phi, 1, !dbg !61
%37 = addrspacecast {} addrspace(10)* %4 to {} addrspace(11)*, !dbg !61
%38 = bitcast {} addrspace(11)* %37 to i64 addrspace(11)*, !dbg !61
%39 = getelementptr inbounds i64, i64 addrspace(11)* %38, i64 %36, !dbg !61
%unbox7 = load i64, i64 addrspace(11)* %12, align 8, !dbg !63, !tbaa !33, !alias.scope !37, !noalias !38
%unbox8 = load i64, i64 addrspace(11)* %39, align 8, !dbg !63, !tbaa !33, !alias.scope !37, !noalias !38
%40 = icmp eq i64 %unbox7, %unbox8, !dbg !63
%41 = zext i1 %40 to i8, !dbg !63
%42 = trunc i8 %41 to i1, !dbg !65
%43 = xor i1 %42, true, !dbg !65
%44 = zext i1 %43 to i8, !dbg !62
%45 = trunc i8 %44 to i1, !dbg !62
%46 = xor i1 %45, true, !dbg !62
br i1 %46, label %L27, label %L22, !dbg !62
L22: ; preds = %L15
%47 = sub i64 %value_phi, 1, !dbg !66
%48 = addrspacecast {} addrspace(10)* %4 to {} addrspace(11)*, !dbg !66
%49 = bitcast {} addrspace(11)* %48 to i64 addrspace(11)*, !dbg !66
%50 = getelementptr inbounds i64, i64 addrspace(11)* %49, i64 %47, !dbg !66
%51 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %value_phi), !dbg !67
%unbox9 = load i64, i64 addrspace(11)* %12, align 8, !dbg !67, !tbaa !33, !alias.scope !37, !noalias !38
%52 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %unbox9), !dbg !67
%unbox10 = load i64, i64 addrspace(11)* %50, align 8, !dbg !67, !tbaa !33, !alias.scope !37, !noalias !38
%53 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %unbox10), !dbg !67
%54 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* @ijl_invoke, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515739994608 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515715232560 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605872 to {}*) to {} addrspace(10)*), {} addrspace(10)* %51, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605808 to {}*) to {} addrspace(10)*), {} addrspace(10)* %52, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605936 to {}*) to {} addrspace(10)*), {} addrspace(10)* %53, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515820605904 to {}*) to {} addrspace(10)*)) #6, !dbg !67
%55 = insertvalue [1 x {} addrspace(10)*] zeroinitializer, {} addrspace(10)* %54, 0, !dbg !68
%56 = bitcast {}*** %0 to {}**, !dbg !67
%current_task11 = getelementptr inbounds {}*, {}** %56, i64 -14, !dbg !67
%box12 = call noalias nonnull dereferenceable(8) {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task11, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140515761616736 to {}*) to {} addrspace(10)*)) #7, !dbg !67
%57 = bitcast {} addrspace(10)* %box12 to [1 x {} addrspace(10)*] addrspace(10)*, !dbg !67
store [1 x {} addrspace(10)*] %55, [1 x {} addrspace(10)*] addrspace(10)* %57, align 8, !dbg !67, !tbaa !33, !alias.scope !37, !noalias !38
%58 = addrspacecast {} addrspace(10)* %box12 to {} addrspace(12)*, !dbg !67
call void @ijl_throw({} addrspace(12)* %58), !dbg !67
unreachable, !dbg !67
L27: ; preds = %L15
%unbox15 = load i64, i64 addrspace(11)* %12, align 8, !dbg !71, !tbaa !33, !alias.scope !37, !noalias !38
%59 = icmp sle i64 1, %unbox15, !dbg !71
%60 = zext i1 %59 to i8, !dbg !76
%61 = trunc i8 %60 to i1, !dbg !76
%62 = xor i1 %61, true, !dbg !76
br i1 %62, label %L30, label %L29, !dbg !76
L29: ; preds = %L27
%63 = icmp ne i64 addrspace(11)* %12, null
br i1 %63, label %guard_pass, label %guard_exit
L30: ; preds = %L27
br label %L31, !dbg !76
L31: ; preds = %L30, %guard_exit31
%value_phi16 = phi i64 [ %guard_res33, %guard_exit31 ], [ 0, %L30 ]
br label %L33, !dbg !79
L33: ; preds = %L31
br label %L34, !dbg !81
L34: ; preds = %L33
%64 = icmp slt i64 %value_phi16, 1, !dbg !84
%65 = zext i1 %64 to i8, !dbg !90
%66 = trunc i8 %65 to i1, !dbg !90
%67 = xor i1 %66, true, !dbg !90
br i1 %67, label %L37, label %L36, !dbg !90
L36: ; preds = %L34
br label %L38, !dbg !90
L37: ; preds = %L34
br label %L38, !dbg !90
L38: ; preds = %L37, %L36
%value_phi17 = phi i8 [ 1, %L36 ], [ 0, %L37 ]
%value_phi18 = phi i64 [ 1, %L37 ], [ undef, %L36 ]
%value_phi19 = phi i64 [ 1, %L37 ], [ undef, %L36 ]
%68 = trunc i8 %value_phi17 to i1, !dbg !83
%69 = xor i1 %68, true, !dbg !83
%70 = zext i1 %69 to i8, !dbg !83
%71 = trunc i8 %70 to i1, !dbg !83
%72 = xor i1 %71, true, !dbg !83
br i1 %72, label %L38.L60_crit_edge, label %L38.L43_crit_edge, !dbg !83
L38.L60_crit_edge: ; preds = %L38
br label %L60, !dbg !92
L38.L43_crit_edge: ; preds = %L38
br label %L43, !dbg !90
L43: ; preds = %L38.L43_crit_edge, %L59
%value_phi20 = phi i64 [ %value_phi6, %L38.L43_crit_edge ], [ %93, %L59 ]
%value_phi21 = phi i64 [ %value_phi18, %L38.L43_crit_edge ], [ %value_phi23, %L59 ]
%value_phi22 = phi i64 [ %value_phi19, %L38.L43_crit_edge ], [ %value_phi24, %L59 ]
%73 = sub i32 %nargs, 1, !dbg !94
%74 = sext i32 %73 to i64, !dbg !94
%75 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %args, i64 1, !dbg !94
%76 = sub i64 %value_phi20, 1, !dbg !94
%77 = add i64 %76, 1, !dbg !94
%78 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %args, i64 %77, !dbg !94
%getfield = load {} addrspace(10)*, {} addrspace(10)** %78, align 8, !dbg !94, !tbaa !96, !alias.scope !37, !noalias !38, !nonnull !7, !dereferenceable !21, !align !21
%79 = sub i64 %value_phi, 1, !dbg !97
%80 = mul i64 %79, 1, !dbg !97
%81 = add i64 0, %80, !dbg !97
%82 = addrspacecast {} addrspace(10)* %13 to {} addrspace(11)*, !dbg !97
%83 = bitcast {} addrspace(11)* %82 to {} addrspace(10)* addrspace(11)*, !dbg !97
%arraysize_ptr = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %83, i32 3, !dbg !97
%84 = bitcast {} addrspace(10)* addrspace(11)* %arraysize_ptr to i64 addrspace(11)*, !dbg !97
%arraysize = load i64, i64 addrspace(11)* %84, align 8, !dbg !97, !tbaa !8, !range !43, !invariant.load !7, !alias.scope !12, !noalias !15
%stride = mul i64 1, %arraysize, !dbg !97
%85 = sub i64 %value_phi21, 1, !dbg !97
%86 = mul i64 %85, %stride, !dbg !97
%87 = add i64 %81, %86, !dbg !97
%88 = addrspacecast {} addrspace(10)* %13 to {} addrspace(11)*, !dbg !97
%89 = bitcast {} addrspace(11)* %88 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !97
%arrayptr_ptr = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %89, i32 0, i32 0, !dbg !97
%arrayptr = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %arrayptr_ptr, align 8, !dbg !97, !tbaa !8, !invariant.load !7, !alias.scope !12, !noalias !15, !nonnull !7
%90 = bitcast i8 addrspace(13)* %arrayptr to double addrspace(13)*, !dbg !97
%memcpy_refined_dst = getelementptr inbounds double, double addrspace(13)* %90, i64 %87, !dbg !97
%91 = addrspacecast {} addrspace(10)* %getfield to {} addrspace(11)*, !dbg !97
%memcpy_refined_src = bitcast {} addrspace(11)* %91 to double addrspace(11)*, !dbg !97
%92 = load double, double addrspace(11)* %memcpy_refined_src, align 8, !dbg !97, !tbaa !33, !alias.scope !37, !noalias !38
store double %92, double addrspace(13)* %memcpy_refined_dst, align 8, !dbg !97, !tbaa !99, !alias.scope !37, !noalias !38
%93 = add i64 %value_phi20, 1, !dbg !101
%94 = icmp eq i64 %value_phi22, %value_phi16, !dbg !104
%95 = zext i1 %94 to i8, !dbg !104
%96 = trunc i8 %95 to i1, !dbg !92
%97 = xor i1 %96, true, !dbg !92
br i1 %97, label %L52, label %L51, !dbg !92
L51: ; preds = %L43
br label %L54, !dbg !92
L52: ; preds = %L43
%98 = add i64 %value_phi22, 1, !dbg !105
br label %L54, !dbg !92
L54: ; preds = %L52, %L51
%value_phi23 = phi i64 [ %98, %L52 ], [ undef, %L51 ]
%value_phi24 = phi i64 [ %98, %L52 ], [ undef, %L51 ]
%value_phi25 = phi i8 [ 1, %L51 ], [ 0, %L52 ]
%99 = trunc i8 %value_phi25 to i1, !dbg !93
%100 = xor i1 %99, true, !dbg !93
%101 = zext i1 %100 to i8, !dbg !93
%102 = trunc i8 %101 to i1, !dbg !93
%103 = xor i1 %102, true, !dbg !93
br i1 %103, label %L54.L60_crit_edge, label %L59, !dbg !93
L54.L60_crit_edge: ; preds = %L54
br label %L60, !dbg !92
L59: ; preds = %L54
br label %L43, !dbg !90
L60: ; preds = %L38.L60_crit_edge, %L54.L60_crit_edge
%value_phi26 = phi i64 [ %93, %L54.L60_crit_edge ], [ %value_phi6, %L38.L60_crit_edge ]
%104 = icmp eq i64 %value_phi5, 5, !dbg !107
%105 = zext i1 %104 to i8, !dbg !107
%106 = trunc i8 %105 to i1, !dbg !108
%107 = xor i1 %106, true, !dbg !108
br i1 %107, label %L64, label %L63, !dbg !108
L63: ; preds = %L60
br label %L66, !dbg !108
L64: ; preds = %L60
%108 = add i64 %value_phi5, 1, !dbg !110
br label %L66, !dbg !108
L66: ; preds = %L64, %L63
%value_phi27 = phi i64 [ %108, %L64 ], [ undef, %L63 ]
%value_phi28 = phi i64 [ %108, %L64 ], [ undef, %L63 ]
%value_phi29 = phi i8 [ 1, %L63 ], [ 0, %L64 ]
%109 = trunc i8 %value_phi29 to i1, !dbg !109
%110 = xor i1 %109, true, !dbg !109
%111 = zext i1 %110 to i8, !dbg !109
%112 = trunc i8 %111 to i1, !dbg !109
%113 = xor i1 %112, true, !dbg !109
br i1 %113, label %L72, label %L71, !dbg !109
L71: ; preds = %L66
br label %L15, !dbg !109
L72: ; preds = %L66, %L14
ret {} addrspace(10)* %13, !dbg !112
after_throw: ; No predecessors!
call void @llvm.trap(), !dbg !56
unreachable, !dbg !56
after_noret: ; No predecessors!
call void @llvm.trap(), !dbg !56
unreachable, !dbg !56
after_throw13: ; No predecessors!
call void @llvm.trap(), !dbg !67
unreachable, !dbg !67
after_noret14: ; No predecessors!
call void @llvm.trap(), !dbg !67
unreachable, !dbg !67
guard_pass: ; preds = %L29
br label %guard_exit
guard_exit: ; preds = %guard_pass, %L29
%guard_res = phi i1 [ false, %L29 ], [ true, %guard_pass ]
br i1 %guard_res, label %guard_pass30, label %guard_exit31
guard_pass30: ; preds = %guard_exit
%unbox32 = load i64, i64 addrspace(11)* %12, align 8, !tbaa !33, !alias.scope !37, !noalias !38
br label %guard_exit31
guard_exit31: ; preds = %guard_pass30, %guard_exit
%guard_res33 = phi i64 [ undef, %guard_exit ], [ %unbox32, %guard_pass30 ]
br label %L31, !dbg !76
}
@vchuravy it looks like in get_function_name of codegen.cpp, the specsig flag, which should be set to true by gpucompiler is getting ignored or overridden or something
@gbaraldi @vtjnash if you have any insights here, especially on why 1.10-rc1 broke and caused this infinite recursion to begin with [whereas beta3 was fine]
@gbaraldi @vtjnash if you have any insights here, especially on why 1.10-rc1 broke and caused this infinite recursion to begin with [whereas beta3 was fine]
t looks like in get_function_name of codegen.cpp, the specsig flag, which should be set to true by gpucompiler is getting ignored or overridden or something
Can you expand on this? Who is the caller? get_function_name is called from emit_function.
Is params.params->prefer_specsig true or false? At this location https://github.com/JuliaLang/julia/blob/dbb9c46795b0e45b770254542854526f3db119b5/src/codegen.cpp#L7273
So the culprit is that we are not handling the invoke correctly that was added in https://github.com/JuliaSparse/SparseArrays.jl/commit/c402d09cf05492179fad2def5632e354a81f5b30
hvcat(rows::Tuple{Vararg{Int}}, n1::Number, ns::Vararg{Number}) = invoke(hvcat, Tuple{typeof(rows), Vararg{Number}}, rows, n1, ns...)
hvcat(rows::Tuple{Vararg{Int}}, n1::N, ns::Vararg{N}) where {N<:Number} = invoke(hvcat, Tuple{typeof(rows), Vararg{N}}, rows, n1, ns...)
which got backported to 1.10-rc1 https://github.com/JuliaLang/julia/pull/48977