Duplicated constants with the same value in the LLVM IR could be merged?
When new thunks are generated, Enzyme fails to merge the constants with the same value in the extended module.
I expect to merge those constants with the same value. If constants with different values are encountered an error must be raised.
using Enzyme, Clang_jll, Libdl
const FUNC_LLVM_IR = raw"""
; ModuleID = '<stdin>'
source_filename = "<string>"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-conda-linux-gnu"
@.const.array.data.5 = hidden unnamed_addr constant [48 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D0?\00\00\00\00\00\00\D0?\00\00\00\00\00\00\E8?\00\00\00\00\00\00\E8?\00\00\00\00\00\00\F0?", align 8
; Function Attrs: argmemonly nofree norecurse nosync nounwind
define i32 @func(double* noalias nocapture writeonly %retptr, { i8*, i32, i8*, i8*, i32 }** noalias nocapture readnone %excinfo, double %arg.t, i8* nocapture readnone %arg.arr.0, i8* nocapture readnone %arg.arr.1, i64 %arg.arr.2, i64 %arg.arr.3, double* nocapture readonly %arg.arr.4, i64 %arg.arr.5.0, i64 %arg.arr.6.0) local_unnamed_addr #0 {
B0.endif.endif:
%.449 = fcmp oeq double %arg.t, 1.000000e+00
%.241 = fcmp ult double %arg.t, 0.000000e+00
%.338 = fcmp uge double %arg.t, 2.500000e-01
%not.or.cond = or i1 %.241, %.338
%_ind.5.1 = sext i1 %not.or.cond to i64
%_ind.4.1 = select i1 %.449, i64 2, i64 %_ind.5.1
%.241.1 = fcmp oge double %arg.t, 2.500000e-01
%.338.1 = fcmp olt double %arg.t, 7.500000e-01
%or.cond6 = and i1 %.241.1, %.338.1
%_ind.5.1.1 = select i1 %or.cond6, i64 1, i64 %_ind.4.1
%_ind.4.1.1 = select i1 %.449, i64 2, i64 %_ind.5.1.1
%.466 = icmp eq i64 %_ind.4.1.1, -1
br i1 %.466, label %common.ret, label %B162
common.ret: ; preds = %B162, %B0.endif.endif
%.842.sink = phi double [ 0.000000e+00, %B0.endif.endif ], [ %.842, %B162 ]
store double %.842.sink, double* %retptr, align 8
ret i32 0
B162: ; preds = %B0.endif.endif
%0 = shl nuw nsw i64 %_ind.4.1.1, 1
%.560 = or i64 %0, 1
%.561 = getelementptr double, double* bitcast ([48 x i8]* @.const.array.data.5 to double*), i64 %.560
%.562 = load double, double* %.561, align 8
%.650 = getelementptr double, double* bitcast ([48 x i8]* @.const.array.data.5 to double*), i64 %0
%.651 = load double, double* %.650, align 8
%.657 = fsub double %.562, %.651
%.740 = getelementptr double, double* bitcast ([48 x i8]* @.const.array.data.5 to double*), i64 %0
%.741 = load double, double* %.740, align 8
%.748 = fsub double %arg.t, %.741
%.752 = fmul double %.748, 1.000000e+02
%.756 = fdiv double %.752, %.657
%.757 = fptosi double %.756 to i64
%.762 = mul nuw nsw i64 %_ind.4.1.1, 100
%.765 = add nsw i64 %.762, %.757
%.771 = icmp eq i64 %.765, 300
%.826 = icmp slt i64 %.765, 0
%.827 = select i1 %.826, i64 %arg.arr.5.0, i64 0
%.828 = add i64 %.827, %.765
%.789 = add i64 %arg.arr.5.0, -1
%.828.sink = select i1 %.771, i64 %.789, i64 %.828
%.841 = getelementptr double, double* %arg.arr.4, i64 %.828.sink
%.842 = load double, double* %.841, align 8
br label %common.ret
}
attributes #0 = { argmemonly nofree norecurse nosync nounwind }
"""
sopath = "./func.so"
run(pipeline(
`$(clang()) -x ir - -Xclang -no-opaque-pointers -O3 -fPIC -fembed-bitcode -shared -o $(sopath)`;
stdin=IOBuffer(FUNC_LLVM_IR)
)
)
# load the function pointer
lib = Libdl.dlopen(sopath)
const fptr = Libdl.dlsym(lib, :func)
function func_ccall(t::Float64, arr::AbstractVector{Float64})
nitems = length(arr)
bitsize = Base.elsize(arr)
GC.@preserve arr begin
excinfo = Ptr{Ptr{Nothing}}()
res = Ref{Float64}()
status = ccall(fptr, Cint,
(Ref{Cdouble}, Ptr{Ptr{Cvoid}},
Cdouble, Ptr{Cvoid}, Ptr{Cvoid},
Clong, Clong, Ptr{Cdouble}, Clong, Clong),
res, excinfo, t, C_NULL, C_NULL, nitems, bitsize,
Base.unsafe_convert(Ptr{Cdouble}, arr), nitems, bitsize)
status == 0 || error("returned non-zero status: $status")
res[]
end
end
# .const.array.data.5 corresponds to the following
# tspans = [[0.0, 0.25], [0.25, 0.75], [0.75, 1.0]]
const GRID_SIZE = 100
const SEG_NUM = 3
const a = rand(SEG_NUM * GRID_SIZE)
ad = similar(a)
autodiff(Reverse, func_ccall, Active, Active(1.0), Const(a))
# returns ((0.0, nothing),)
autodiff(Reverse, func_ccall, Active, Const(1.0), Duplicated(a, ad))
# ERROR: LLVM error: Duplicate definition of symbol '.const.array.data.5'
# Stacktrace:
# [1] macro expansion
# @ ~/.julia/packages/LLVM/iza6e/src/executionengine/utils.jl:28 [inlined]
# [2] add!
# @ ~/.julia/packages/LLVM/iza6e/src/orc.jl:434 [inlined]
# [3] add!(mod::LLVM.Module)
# @ Enzyme.Compiler.JIT ~/projects/qruise/Enzyme.jl/src/compiler/orcv2.jl:264
# [4] _link(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, edges::Vector{…}, adjoint_name::String, primal_name::Union{…}, TapeType::Any, prepost::String)
# @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:5919
# [5] cached_compilation
# @ ~/projects/qruise/Enzyme.jl/src/compiler.jl:6012 [inlined]
# [6] thunkbase(mi::Core.MethodInstance, World::UInt64, FA::Type{…}, A::Type{…}, TT::Type, Mode::Enzyme.API.CDerivativeMode, width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, edges::Vector{…})
# @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6127
# [7] thunk_generator(world::UInt64, source::Union{…}, FA::Type, A::Type, TT::Type, Mode::Enzyme.API.CDerivativeMode, Width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, self::Any, fakeworld::Any, fa::Type, a::Type, tt::Type, mode::Type, width::Type, modifiedbetween::Type, returnprimal::Type, shadowinit::Type, abi::Type, erriffuncwritten::Type, runtimeactivity::Type, strongzero::Type)
# @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6271
# [8] autodiff
# @ ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:502 [inlined]
# [9] autodiff(::ReverseMode{…}, ::typeof(func_ccall), ::Type{…}, ::Const{…}, ::Duplicated{…})
# @ Enzyme ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:542
# [10] top-level scope
# @ REPL[14]:1
# Some type information was truncated. Use `show(err)` to see complete types.
Package information
julia> versioninfo()
Julia Version 1.11.6
Commit 9615af0f269 (2025-07-09 12:58 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 12 × AMD Ryzen 5 7640U w/ Radeon 760M Graphics
WORD_SIZE: 64
LLVM: libLLVM-16.0.6 (ORCJIT, znver4)
Threads: 1 default, 0 interactive, 1 GC (on 12 virtual cores)
julia> pkgversion(Enzyme)
v"0.13.93"
Is this a plausible path if I want to link all the LLVM.Module's generated by _thunk(job) into a shared common LLVM.Context, and then pass that reconstructed, canonicalised LLVM.Module to _link and iteratively update it?
https://github.com/EnzymeAD/Enzyme.jl/blob/461bed8ef54ad0b22b437560da74220721a55dbe/src/compiler.jl#L6007-L6029
That won't work, we compile one ad call at a time, and the code is no longer available in module form, but registered in global data. This should only occur for the direct ccall route? I suspect what needs to happen there is to privatize the globals or something like that.
I spent a few days playing around and getting familiar with the source code. I noticed that for the add! function here
https://github.com/EnzymeAD/Enzyme.jl/blob/a7509fbd0994b2dfd4b9175b6777e34e3f6f1033/src/compiler/orcv2.jl#L249-L266
if there is an already existing pointer for a function, it will get replaced in the module by that pointer. But this is not the case for the global variables. I mean, global variables are not treated the same way. I also noticed that there is a dispatch for Compiler.eraseInst for global variables
https://github.com/EnzymeAD/Enzyme.jl/blob/a7509fbd0994b2dfd4b9175b6777e34e3f6f1033/src/compiler/utils.jl#L429-L435
Can I treat the global variables the same way the functions are treated, i.e. replace them with their respective pointers in the module?
I noticed this happens only when I use ccall. If I used llvmcall, this wouldn't be an issue. A MWE to demonstrate this
using Enzyme, Libdl
const LLVM_IR = raw"""
; ModuleID = '<stdin>'
source_filename = "<string>"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-conda-linux-gnu"
@A = private unnamed_addr constant [3 x double]
[double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
define double @func(double %x, double %y, i64 %n) {
entry:
%ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
%aval = load double, double* %ptr, align 8
%prod = fmul double %x, %aval
%sum = fadd double %prod, %y
ret double %sum
}
""";
sopath = "./func.so"
run(
pipeline(
`clang -x ir - -Xclang -no-opaque-pointers -O3 -fPIC -fembed-bitcode -shared -o $(sopath
)`; stdin=IOBuffer(LLVM_IR)
)
); # load the function pointer
lib = Libdl.dlopen(sopath);
const fptr = Libdl.dlsym(lib, :func);
function func_llvm(x::Float64, y::Float64, n::Int)
n >= 0 && n <= 2 || throw("0 ≤ n ≤ 2")
Base.llvmcall((LLVM_IR, "func"), Cdouble,
Tuple{Cdouble,Cdouble,Clong},
x, y, n
)
end;
function func_ccall(x::Float64, y::Float64, n::Int)
n >= 0 && n <= 2 || throw("0 ≤ n ≤ 2")
ccall(fptr, Cdouble,
(Cdouble, Cdouble, Clong),
x, y, n
)
end;
const x = 2.0
const y = 1.0
const n = 2
const A = [1.0, 2.0, 3.0]
@assert func_llvm(x, y, n) == func_ccall(x, y, n)
@assert func_llvm(x, y, n) == x * A[n+1] + y
@assert func_ccall(x, y, n) == x * A[n+1] + y
gradient(Reverse, func_llvm, Const(x), y, Const(n))
# (nothing, 1.0, nothing)
gradient(Reverse, func_llvm, x, Const(y), Const(n))
# (nothing, 1.0, nothing)
##############################################################
##############################################################
##############################################################
##############################################################
# the constructed module here keeps the private linkage of A
##############################################################
##############################################################
##############################################################
##############################################################
# ; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
#
# @A = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
#
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
#
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
#
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_llvm_7474wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
# %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
# %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
# %4 = bitcast i8* %tls_ppgcstack to {}****
# %tls_pgcstack = load {}***, {}**** %4, align 8
# %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
# %5 = bitcast {}*** %ptls_field3.i to i64***
# %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
# %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
# %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
# fence syncscope("singlethread") seq_cst
# %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
# fence syncscope("singlethread") seq_cst
# %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
# br i1 %spec.select.i, label %L12.i, label %diffejulia_func_llvm_7474.exit, !dbg !27
#
# L12.i: ; preds = %entry
# call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
# unreachable, !dbg !27
#
# diffejulia_func_llvm_7474.exit: ; preds = %entry
# %8 = call fastcc { double } @diffejulia_func_llvm_7474u7483(i64 %2, double %3) #10, !dbg !28
# %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
# ret [1 x { double }] %9
# }
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffejulia_func_llvm_7474u7483(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
# %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
# %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
# %0 = fmul fast double %aval, %differeturn
# %1 = insertvalue { double } zeroinitializer, double %0, 0
# ret { double } %1
# }
#
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
#
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
#
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
#
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_llvm_7474wrap", linkageName: "diffejulia_func_llvm_7474wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[7]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_llvm", linkageName: "julia_func_llvm_7474", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}
gradient(Reverse, func_ccall, Const(x), y, Const(n))
# (nothing, 1.0, nothing)
gradient(Reverse, func_ccall, x, Const(y), Const(n))
##############################################################
##############################################################
##############################################################
##############################################################
# the constructed module here swtiches to external linkage for A
##############################################################
##############################################################
##############################################################
##############################################################
# ; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
#
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
#
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
#
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
#
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8440wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
# %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
# %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
# %4 = bitcast i8* %tls_ppgcstack to {}****
# %tls_pgcstack = load {}***, {}**** %4, align 8
# %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
# %5 = bitcast {}*** %ptls_field3.i to i64***
# %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
# %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
# %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
# fence syncscope("singlethread") seq_cst
# %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
# fence syncscope("singlethread") seq_cst
# %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
# br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8440.exit, !dbg !27
#
# L11.i: ; preds = %entry
# call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
# unreachable, !dbg !27
#
# diffejulia_func_ccall_8440.exit: ; preds = %entry
# %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
# %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
# ret [1 x { double }] %9
# }
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
# %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
# %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
# %0 = fmul fast double %aval, %differeturn
# %1 = insertvalue { double } zeroinitializer, double %0, 0
# ret { double } %1
# }
#
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
#
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
#
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
#
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8440wrap", linkageName: "diffejulia_func_ccall_8440wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8440", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
#
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
#
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
#
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
#
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8454wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
# %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
# %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
# %4 = bitcast i8* %tls_ppgcstack to {}****
# %tls_pgcstack = load {}***, {}**** %4, align 8
# %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
# %5 = bitcast {}*** %ptls_field3.i to i64***
# %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
# %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
# %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
# fence syncscope("singlethread") seq_cst
# %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
# fence syncscope("singlethread") seq_cst
# %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
# br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8454.exit, !dbg !27
#
# L11.i: ; preds = %entry
# call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
# unreachable, !dbg !27
#
# diffejulia_func_ccall_8454.exit: ; preds = %entry
# %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
# %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
# ret [1 x { double }] %9
# }
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
# %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
# %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
# %0 = fmul fast double %aval, %differeturn
# %1 = insertvalue { double } zeroinitializer, double %0, 0
# ret { double } %1
# }
#
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
#
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
#
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
#
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8454wrap", linkageName: "diffejulia_func_ccall_8454wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8454", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
#
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
#
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
#
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
#
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8484wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
# %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
# %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
# %4 = bitcast i8* %tls_ppgcstack to {}****
# %tls_pgcstack = load {}***, {}**** %4, align 8
# %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
# %5 = bitcast {}*** %ptls_field3.i to i64***
# %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
# %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
# %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
# fence syncscope("singlethread") seq_cst
# %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
# fence syncscope("singlethread") seq_cst
# %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
# br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8484.exit, !dbg !27
#
# L11.i: ; preds = %entry
# call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
# unreachable, !dbg !27
#
# diffejulia_func_ccall_8484.exit: ; preds = %entry
# %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
# %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
# ret [1 x { double }] %9
# }
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
# %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
# %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
# %0 = fmul fast double %aval, %differeturn
# %1 = insertvalue { double } zeroinitializer, double %0, 0
# ret { double } %1
# }
#
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
#
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
#
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
#
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8484wrap", linkageName: "diffejulia_func_ccall_8484wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8484", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
#
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
#
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
#
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
#
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8490wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
# %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
# %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
# %4 = bitcast i8* %tls_ppgcstack to {}****
# %tls_pgcstack = load {}***, {}**** %4, align 8
# %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
# %5 = bitcast {}*** %ptls_field3.i to i64***
# %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
# %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
# %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
# fence syncscope("singlethread") seq_cst
# %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
# fence syncscope("singlethread") seq_cst
# %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
# br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8490.exit, !dbg !27
#
# L11.i: ; preds = %entry
# call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
# unreachable, !dbg !27
#
# diffejulia_func_ccall_8490.exit: ; preds = %entry
# %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
# %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
# ret [1 x { double }] %9
# }
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
#
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
# %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
# %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
# %0 = fmul fast double %aval, %differeturn
# %1 = insertvalue { double } zeroinitializer, double %0, 0
# ret { double } %1
# }
#
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
#
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
#
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
#
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
#
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8490wrap", linkageName: "diffejulia_func_ccall_8490wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8490", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}
# ERROR: LLVM error: Duplicate definition of symbol 'A'
# Stacktrace:
# [1] macro expansion
# @ ~/.julia/packages/LLVM/iza6e/src/executionengine/utils.jl:28 [inlined]
# [2] add!
# @ ~/.julia/packages/LLVM/iza6e/src/orc.jl:434 [inlined]
# [3] add!(mod::LLVM.Module)
# @ Enzyme.Compiler.JIT ~/projects/qruise/Enzyme.jl/src/compiler/orcv2.jl:286
# [4] _link(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, edges::Vector{…}, adjoint_name::String, primal_name::Union{…}, TapeType::Any, prepost::String)
# @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:5923
# [5] cached_compilation
# @ ~/projects/qruise/Enzyme.jl/src/compiler.jl:6016 [inlined]
# [6] thunkbase(mi::Core.MethodInstance, World::UInt64, FA::Type{…}, A::Type{…}, TT::Type, Mode::Enzyme.API.CDerivativeMode, width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, edges::Vector{…})
# @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6131
# [7] thunk_generator(world::UInt64, source::Union{…}, FA::Type, A::Type, TT::Type, Mode::Enzyme.API.CDerivativeMode, Width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, self::Any, fakeworld::Any, fa::Type, a::Type, tt::Type, mode::Type, width::Type, modifiedbetween::Type, returnprimal::Type, shadowinit::Type, abi::Type, erriffuncwritten::Type, runtimeactivity::Type, strongzero::Type)
# @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6275
# [8] autodiff
# @ ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:502 [inlined]
# [9] autodiff
# @ ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:542 [inlined]
# [10] macro expansion
# @ ~/projects/qruise/Enzyme.jl/src/sugar.jl:286 [inlined]
# [11] gradient(::ReverseMode{…}, ::typeof(func_ccall), ::Float64, ::Const{…}, ::Const{…})
# @ Enzyme ~/projects/qruise/Enzyme.jl/src/sugar.jl:273
# [12] top-level scope
# @ REPL[17]:1