Enzyme.jl icon indicating copy to clipboard operation
Enzyme.jl copied to clipboard

Duplicated constants with the same value in the LLVM IR could be merged?

Open ymardoukhi opened this issue 1 month ago • 4 comments

When new thunks are generated, Enzyme fails to merge the constants with the same value in the extended module.

I expect to merge those constants with the same value. If constants with different values are encountered an error must be raised.

using Enzyme, Clang_jll, Libdl

const FUNC_LLVM_IR = raw"""
	; ModuleID = '<stdin>'
	source_filename = "<string>"
	target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-conda-linux-gnu"
	
	@.const.array.data.5 = hidden unnamed_addr constant [48 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D0?\00\00\00\00\00\00\D0?\00\00\00\00\00\00\E8?\00\00\00\00\00\00\E8?\00\00\00\00\00\00\F0?", align 8
	
	; Function Attrs: argmemonly nofree norecurse nosync nounwind
	define i32 @func(double* noalias nocapture writeonly %retptr, { i8*, i32, i8*, i8*, i32 }** noalias nocapture readnone %excinfo, double %arg.t, i8* nocapture readnone %arg.arr.0, i8* nocapture readnone %arg.arr.1, i64 %arg.arr.2, i64 %arg.arr.3, double* nocapture readonly %arg.arr.4, i64 %arg.arr.5.0, i64 %arg.arr.6.0) local_unnamed_addr #0 {
	B0.endif.endif:
	  %.449 = fcmp oeq double %arg.t, 1.000000e+00
	  %.241 = fcmp ult double %arg.t, 0.000000e+00
	  %.338 = fcmp uge double %arg.t, 2.500000e-01
	  %not.or.cond = or i1 %.241, %.338
	  %_ind.5.1 = sext i1 %not.or.cond to i64
	  %_ind.4.1 = select i1 %.449, i64 2, i64 %_ind.5.1
	  %.241.1 = fcmp oge double %arg.t, 2.500000e-01
	  %.338.1 = fcmp olt double %arg.t, 7.500000e-01
	  %or.cond6 = and i1 %.241.1, %.338.1
	  %_ind.5.1.1 = select i1 %or.cond6, i64 1, i64 %_ind.4.1
	  %_ind.4.1.1 = select i1 %.449, i64 2, i64 %_ind.5.1.1
	  %.466 = icmp eq i64 %_ind.4.1.1, -1
	  br i1 %.466, label %common.ret, label %B162
	
	common.ret:                                       ; preds = %B162, %B0.endif.endif
	  %.842.sink = phi double [ 0.000000e+00, %B0.endif.endif ], [ %.842, %B162 ]
	  store double %.842.sink, double* %retptr, align 8
	  ret i32 0
	
	B162:                                             ; preds = %B0.endif.endif
	  %0 = shl nuw nsw i64 %_ind.4.1.1, 1
	  %.560 = or i64 %0, 1
	  %.561 = getelementptr double, double* bitcast ([48 x i8]* @.const.array.data.5 to double*), i64 %.560
	  %.562 = load double, double* %.561, align 8
	  %.650 = getelementptr double, double* bitcast ([48 x i8]* @.const.array.data.5 to double*), i64 %0
	  %.651 = load double, double* %.650, align 8
	  %.657 = fsub double %.562, %.651
	  %.740 = getelementptr double, double* bitcast ([48 x i8]* @.const.array.data.5 to double*), i64 %0
	  %.741 = load double, double* %.740, align 8
	  %.748 = fsub double %arg.t, %.741
	  %.752 = fmul double %.748, 1.000000e+02
	  %.756 = fdiv double %.752, %.657
	  %.757 = fptosi double %.756 to i64
	  %.762 = mul nuw nsw i64 %_ind.4.1.1, 100
	  %.765 = add nsw i64 %.762, %.757
	  %.771 = icmp eq i64 %.765, 300
	  %.826 = icmp slt i64 %.765, 0
	  %.827 = select i1 %.826, i64 %arg.arr.5.0, i64 0
	  %.828 = add i64 %.827, %.765
	  %.789 = add i64 %arg.arr.5.0, -1
	  %.828.sink = select i1 %.771, i64 %.789, i64 %.828
	  %.841 = getelementptr double, double* %arg.arr.4, i64 %.828.sink
	  %.842 = load double, double* %.841, align 8
	  br label %common.ret
	}
	
	attributes #0 = { argmemonly nofree norecurse nosync nounwind }
"""

sopath = "./func.so"
run(pipeline(
        `$(clang()) -x ir - -Xclang -no-opaque-pointers -O3 -fPIC -fembed-bitcode -shared -o $(sopath)`;
        stdin=IOBuffer(FUNC_LLVM_IR)
)
)
# load the function pointer
lib = Libdl.dlopen(sopath)
const fptr = Libdl.dlsym(lib, :func)


function func_ccall(t::Float64, arr::AbstractVector{Float64})
        nitems = length(arr)
        bitsize = Base.elsize(arr)

        GC.@preserve arr begin
                excinfo = Ptr{Ptr{Nothing}}()
                res = Ref{Float64}()

                status = ccall(fptr, Cint,
                        (Ref{Cdouble}, Ptr{Ptr{Cvoid}},
                                Cdouble, Ptr{Cvoid}, Ptr{Cvoid},
                                Clong, Clong, Ptr{Cdouble}, Clong, Clong),
                        res, excinfo, t, C_NULL, C_NULL, nitems, bitsize,
                        Base.unsafe_convert(Ptr{Cdouble}, arr), nitems, bitsize)

                status == 0 || error("returned non-zero status: $status")
                res[]
        end
end

# .const.array.data.5 corresponds to the following
# tspans = [[0.0, 0.25], [0.25, 0.75], [0.75, 1.0]]
const GRID_SIZE = 100
const SEG_NUM = 3
const a = rand(SEG_NUM * GRID_SIZE)


ad = similar(a)
autodiff(Reverse, func_ccall, Active, Active(1.0), Const(a))
# returns ((0.0, nothing),)
autodiff(Reverse, func_ccall, Active, Const(1.0), Duplicated(a, ad))
# ERROR: LLVM error: Duplicate definition of symbol '.const.array.data.5'
# Stacktrace:
#   [1] macro expansion
#     @ ~/.julia/packages/LLVM/iza6e/src/executionengine/utils.jl:28 [inlined]
#   [2] add!
#     @ ~/.julia/packages/LLVM/iza6e/src/orc.jl:434 [inlined]
#   [3] add!(mod::LLVM.Module)
#     @ Enzyme.Compiler.JIT ~/projects/qruise/Enzyme.jl/src/compiler/orcv2.jl:264
#   [4] _link(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, edges::Vector{…}, adjoint_name::String, primal_name::Union{…}, TapeType::Any, prepost::String)
#     @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:5919
#   [5] cached_compilation
#     @ ~/projects/qruise/Enzyme.jl/src/compiler.jl:6012 [inlined]
#   [6] thunkbase(mi::Core.MethodInstance, World::UInt64, FA::Type{…}, A::Type{…}, TT::Type, Mode::Enzyme.API.CDerivativeMode, width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, edges::Vector{…})
#     @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6127
#   [7] thunk_generator(world::UInt64, source::Union{…}, FA::Type, A::Type, TT::Type, Mode::Enzyme.API.CDerivativeMode, Width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, self::Any, fakeworld::Any, fa::Type, a::Type, tt::Type, mode::Type, width::Type, modifiedbetween::Type, returnprimal::Type, shadowinit::Type, abi::Type, erriffuncwritten::Type, runtimeactivity::Type, strongzero::Type)
#     @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6271
#   [8] autodiff
#     @ ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:502 [inlined]
#   [9] autodiff(::ReverseMode{…}, ::typeof(func_ccall), ::Type{…}, ::Const{…}, ::Duplicated{…})
#     @ Enzyme ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:542
#  [10] top-level scope
#     @ REPL[14]:1
# Some type information was truncated. Use `show(err)` to see complete types.

Package information

julia> versioninfo()
Julia Version 1.11.6
Commit 9615af0f269 (2025-07-09 12:58 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 12 × AMD Ryzen 5 7640U w/ Radeon 760M Graphics
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, znver4)
Threads: 1 default, 0 interactive, 1 GC (on 12 virtual cores)

julia> pkgversion(Enzyme)
v"0.13.93"

ymardoukhi avatar Oct 28 '25 15:10 ymardoukhi

Is this a plausible path if I want to link all the LLVM.Module's generated by _thunk(job) into a shared common LLVM.Context, and then pass that reconstructed, canonicalised LLVM.Module to _link and iteratively update it?

https://github.com/EnzymeAD/Enzyme.jl/blob/461bed8ef54ad0b22b437560da74220721a55dbe/src/compiler.jl#L6007-L6029

ymardoukhi avatar Oct 30 '25 14:10 ymardoukhi

That won't work, we compile one ad call at a time, and the code is no longer available in module form, but registered in global data. This should only occur for the direct ccall route? I suspect what needs to happen there is to privatize the globals or something like that.

vchuravy avatar Oct 30 '25 14:10 vchuravy

I spent a few days playing around and getting familiar with the source code. I noticed that for the add! function here https://github.com/EnzymeAD/Enzyme.jl/blob/a7509fbd0994b2dfd4b9175b6777e34e3f6f1033/src/compiler/orcv2.jl#L249-L266 if there is an already existing pointer for a function, it will get replaced in the module by that pointer. But this is not the case for the global variables. I mean, global variables are not treated the same way. I also noticed that there is a dispatch for Compiler.eraseInst for global variables https://github.com/EnzymeAD/Enzyme.jl/blob/a7509fbd0994b2dfd4b9175b6777e34e3f6f1033/src/compiler/utils.jl#L429-L435 Can I treat the global variables the same way the functions are treated, i.e. replace them with their respective pointers in the module?

ymardoukhi avatar Nov 03 '25 18:11 ymardoukhi

I noticed this happens only when I use ccall. If I used llvmcall, this wouldn't be an issue. A MWE to demonstrate this

using Enzyme, Libdl
const LLVM_IR = raw"""
	; ModuleID = '<stdin>'
	source_filename = "<string>"
	target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-conda-linux-gnu"


        @A = private unnamed_addr constant [3 x double]
             [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
        
        define double @func(double %x, double %y, i64 %n) {
        entry:
          %ptr  = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
          %aval = load double, double* %ptr, align 8
          %prod = fmul double %x, %aval
          %sum  = fadd double %prod, %y
          ret double %sum
        }
""";

sopath = "./func.so"

run(
        pipeline(
                `clang -x ir - -Xclang -no-opaque-pointers -O3 -fPIC -fembed-bitcode -shared -o $(sopath
        )`; stdin=IOBuffer(LLVM_IR)
        )
); # load the function pointer
lib = Libdl.dlopen(sopath);
const fptr = Libdl.dlsym(lib, :func);


function func_llvm(x::Float64, y::Float64, n::Int)
        n >= 0 && n <= 2 || throw("0 ≤ n ≤ 2")
        Base.llvmcall((LLVM_IR, "func"), Cdouble,
                Tuple{Cdouble,Cdouble,Clong},
                x, y, n
        )
end;


function func_ccall(x::Float64, y::Float64, n::Int)
        n >= 0 && n <= 2 || throw("0 ≤ n ≤ 2")
        ccall(fptr, Cdouble,
                (Cdouble, Cdouble, Clong),
                x, y, n
        )
end;

const x = 2.0
const y = 1.0
const n = 2
const A = [1.0, 2.0, 3.0]
@assert func_llvm(x, y, n) == func_ccall(x, y, n)
@assert func_llvm(x, y, n) == x * A[n+1] + y
@assert func_ccall(x, y, n) == x * A[n+1] + y


gradient(Reverse, func_llvm, Const(x), y, Const(n))
# (nothing, 1.0, nothing)
gradient(Reverse, func_llvm, x, Const(y), Const(n))
# (nothing, 1.0, nothing)
##############################################################
##############################################################
##############################################################
##############################################################
# the constructed module here keeps the private linkage of A
##############################################################
##############################################################
##############################################################
##############################################################
# ; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
# 
# @A = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
# 
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
# 
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
# 
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_llvm_7474wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
#   %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
#   %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
#   %4 = bitcast i8* %tls_ppgcstack to {}****
#   %tls_pgcstack = load {}***, {}**** %4, align 8
#   %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
#   %5 = bitcast {}*** %ptls_field3.i to i64***
#   %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
#   %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
#   %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
#   fence syncscope("singlethread") seq_cst
#   %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
#   fence syncscope("singlethread") seq_cst
#   %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
#   br i1 %spec.select.i, label %L12.i, label %diffejulia_func_llvm_7474.exit, !dbg !27
# 
# L12.i:                                            ; preds = %entry
#   call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
#   unreachable, !dbg !27
# 
# diffejulia_func_llvm_7474.exit:                   ; preds = %entry
#   %8 = call fastcc { double } @diffejulia_func_llvm_7474u7483(i64 %2, double %3) #10, !dbg !28
#   %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
#   ret [1 x { double }] %9
# }
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffejulia_func_llvm_7474u7483(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
#   %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
#   %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
#   %0 = fmul fast double %aval, %differeturn
#   %1 = insertvalue { double } zeroinitializer, double %0, 0
#   ret { double } %1
# }
# 
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
# 
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
# 
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
# 
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_llvm_7474wrap", linkageName: "diffejulia_func_llvm_7474wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[7]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_llvm", linkageName: "julia_func_llvm_7474", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}


gradient(Reverse, func_ccall, Const(x), y, Const(n))
# (nothing, 1.0, nothing)
gradient(Reverse, func_ccall, x, Const(y), Const(n))
##############################################################
##############################################################
##############################################################
##############################################################
# the constructed module here swtiches to external linkage for A
##############################################################
##############################################################
##############################################################
##############################################################
# ; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
# 
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
# 
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
# 
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
# 
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8440wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
#   %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
#   %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
#   %4 = bitcast i8* %tls_ppgcstack to {}****
#   %tls_pgcstack = load {}***, {}**** %4, align 8
#   %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
#   %5 = bitcast {}*** %ptls_field3.i to i64***
#   %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
#   %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
#   %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
#   fence syncscope("singlethread") seq_cst
#   %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
#   fence syncscope("singlethread") seq_cst
#   %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
#   br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8440.exit, !dbg !27
# 
# L11.i:                                            ; preds = %entry
#   call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
#   unreachable, !dbg !27
# 
# diffejulia_func_ccall_8440.exit:                  ; preds = %entry
#   %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
#   %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
#   ret [1 x { double }] %9
# }
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
#   %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
#   %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
#   %0 = fmul fast double %aval, %differeturn
#   %1 = insertvalue { double } zeroinitializer, double %0, 0
#   ret { double } %1
# }
# 
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
# 
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
# 
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
# 
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8440wrap", linkageName: "diffejulia_func_ccall_8440wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8440", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
# 
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
# 
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
# 
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
# 
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8454wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
#   %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
#   %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
#   %4 = bitcast i8* %tls_ppgcstack to {}****
#   %tls_pgcstack = load {}***, {}**** %4, align 8
#   %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
#   %5 = bitcast {}*** %ptls_field3.i to i64***
#   %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
#   %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
#   %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
#   fence syncscope("singlethread") seq_cst
#   %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
#   fence syncscope("singlethread") seq_cst
#   %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
#   br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8454.exit, !dbg !27
# 
# L11.i:                                            ; preds = %entry
#   call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
#   unreachable, !dbg !27
# 
# diffejulia_func_ccall_8454.exit:                  ; preds = %entry
#   %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
#   %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
#   ret [1 x { double }] %9
# }
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
#   %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
#   %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
#   %0 = fmul fast double %aval, %differeturn
#   %1 = insertvalue { double } zeroinitializer, double %0, 0
#   ret { double } %1
# }
# 
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
# 
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
# 
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
# 
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8454wrap", linkageName: "diffejulia_func_ccall_8454wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8454", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
# 
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
# 
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
# 
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
# 
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8484wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
#   %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
#   %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
#   %4 = bitcast i8* %tls_ppgcstack to {}****
#   %tls_pgcstack = load {}***, {}**** %4, align 8
#   %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
#   %5 = bitcast {}*** %ptls_field3.i to i64***
#   %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
#   %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
#   %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
#   fence syncscope("singlethread") seq_cst
#   %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
#   fence syncscope("singlethread") seq_cst
#   %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
#   br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8484.exit, !dbg !27
# 
# L11.i:                                            ; preds = %entry
#   call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
#   unreachable, !dbg !27
# 
# diffejulia_func_ccall_8484.exit:                  ; preds = %entry
#   %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
#   %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
#   ret [1 x { double }] %9
# }
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
#   %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
#   %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
#   %0 = fmul fast double %aval, %differeturn
#   %1 = insertvalue { double } zeroinitializer, double %0, 0
#   ret { double } %1
# }
# 
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
# 
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
# 
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
# 
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8484wrap", linkageName: "diffejulia_func_ccall_8484wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8484", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}; ModuleID = 'start'
# source_filename = "start"
# target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
# target triple = "x86_64-linux-gnu"
# 
# @A = dso_local unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 8
# 
# ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @julia.safepoint(i64*) local_unnamed_addr #0
# 
# ; Function Attrs: noreturn
# declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #1
# 
# ; Function Attrs: alwaysinline willreturn
# define [1 x { double }] @diffejulia_func_ccall_8490wrap(double %0, double %1, i64 %2, double %3) #2 !dbg !4 {
# entry:
#   %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #8
#   %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
#   %4 = bitcast i8* %tls_ppgcstack to {}****
#   %tls_pgcstack = load {}***, {}**** %4, align 8
#   %ptls_field3.i = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
#   %5 = bitcast {}*** %ptls_field3.i to i64***
#   %ptls_load45.i = load i64**, i64*** %5, align 8, !tbaa !8, !alias.scope !12, !noalias !15
#   %6 = getelementptr inbounds i64*, i64** %ptls_load45.i, i64 2
#   %safepoint.i = load i64*, i64** %6, align 8, !tbaa !17, !alias.scope !19, !noalias !22
#   fence syncscope("singlethread") seq_cst
#   %7 = load volatile i64, i64* %safepoint.i, align 8, !dbg !24
#   fence syncscope("singlethread") seq_cst
#   %spec.select.i = icmp ugt i64 %2, 2, !dbg !27
#   br i1 %spec.select.i, label %L11.i, label %diffejulia_func_ccall_8490.exit, !dbg !27
# 
# L11.i:                                            ; preds = %entry
#   call void @ijl_throw({} addrspace(12)* noundef addrspacecast ({}* inttoptr (i64 140058495569200 to {}*) to {} addrspace(12)*)) #9, !dbg !27
#   unreachable, !dbg !27
# 
# diffejulia_func_ccall_8490.exit:                  ; preds = %entry
#   %8 = call fastcc { double } @diffefunc(i64 %2, double %3) #10, !dbg !28
#   %9 = insertvalue [1 x { double }] zeroinitializer, { double } %8, 0
#   ret [1 x { double }] %9
# }
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
# declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
# 
# ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
# define private fastcc { double } @diffefunc(i64 %n, double %differeturn) unnamed_addr #4 {
# entry:
#   %ptr = getelementptr inbounds [3 x double], [3 x double]* @A, i64 0, i64 %n
#   %aval = load double, double* %ptr, align 8, !alias.scope !29, !noalias !32
#   %0 = fmul fast double %aval, %differeturn
#   %1 = insertvalue { double } zeroinitializer, double %0, 0
#   ret { double } %1
# }
# 
# ; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
# declare void @ijl_gc_queue_root({} addrspace(10)*) #5
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*, i32, i32, i64) #6
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*, i64, i64) #7
# 
# ; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
# declare noalias nonnull {} addrspace(10)* @ijl_gc_alloc_typed(i8*, i64, i64) #7
# 
# attributes #0 = { nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "enzyme_ReadOnlyOrThrow" "enzyme_inactive" "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #1 = { noreturn "enzyme_no_escaping_allocation" "enzymejl_world"="26736" }
# attributes #2 = { alwaysinline willreturn "enzymejl_world"="26736" "frame-pointer"="all" }
# attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
# attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "enzyme_parmremove"="0,1" "frame-pointer"="all" }
# attributes #5 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
# attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #7 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
# attributes #8 = { nounwind }
# attributes #9 = { mustprogress noreturn willreturn }
# attributes #10 = { nounwind willreturn }
# 
# !llvm.module.flags = !{!0, !1}
# !llvm.dbg.cu = !{!2}
# 
# !0 = !{i32 2, !"Dwarf Version", i32 4}
# !1 = !{i32 2, !"Debug Info Version", i32 3}
# !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
# !3 = !DIFile(filename: "julia", directory: ".")
# !4 = distinct !DISubprogram(name: "diffejulia_func_ccall_8490wrap", linkageName: "diffejulia_func_ccall_8490wrap", scope: null, file: !5, type: !6, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !5 = !DIFile(filename: "REPL[8]", directory: ".")
# !6 = !DISubroutineType(types: !7)
# !7 = !{}
# !8 = !{!9, !9, i64 0}
# !9 = !{!"jtbaa_gcframe", !10, i64 0}
# !10 = !{!"jtbaa", !11, i64 0}
# !11 = !{!"jtbaa"}
# !12 = !{!13}
# !13 = distinct !{!13, !14, !"primal"}
# !14 = distinct !{!14, !" diff: %pgcstack"}
# !15 = !{!16}
# !16 = distinct !{!16, !14, !"shadow_0"}
# !17 = !{!18, !18, i64 0, i64 0}
# !18 = !{!"jtbaa_const", !10, i64 0}
# !19 = !{!20}
# !20 = distinct !{!20, !21, !"primal"}
# !21 = distinct !{!21, !" diff: %ptls_load45"}
# !22 = !{!23}
# !23 = distinct !{!23, !21, !"shadow_0"}
# !24 = !DILocation(line: 1, scope: !25, inlinedAt: !26)
# !25 = distinct !DISubprogram(name: "func_ccall", linkageName: "julia_func_ccall_8490", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
# !26 = distinct !DILocation(line: 0, scope: !4)
# !27 = !DILocation(line: 2, scope: !25, inlinedAt: !26)
# !28 = !DILocation(line: 3, scope: !25, inlinedAt: !26)
# !29 = !{!30}
# !30 = distinct !{!30, !31, !"primal"}
# !31 = distinct !{!31, !" diff: %A"}
# !32 = !{!33}
# !33 = distinct !{!33, !31, !"shadow_0"}

# ERROR: LLVM error: Duplicate definition of symbol 'A'
# Stacktrace:
#   [1] macro expansion
#     @ ~/.julia/packages/LLVM/iza6e/src/executionengine/utils.jl:28 [inlined]
#   [2] add!
#     @ ~/.julia/packages/LLVM/iza6e/src/orc.jl:434 [inlined]
#   [3] add!(mod::LLVM.Module)
#     @ Enzyme.Compiler.JIT ~/projects/qruise/Enzyme.jl/src/compiler/orcv2.jl:286
#   [4] _link(job::GPUCompiler.CompilerJob{…}, mod::LLVM.Module, edges::Vector{…}, adjoint_name::String, primal_name::Union{…}, TapeType::Any, prepost::String)
#     @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:5923
#   [5] cached_compilation
#     @ ~/projects/qruise/Enzyme.jl/src/compiler.jl:6016 [inlined]
#   [6] thunkbase(mi::Core.MethodInstance, World::UInt64, FA::Type{…}, A::Type{…}, TT::Type, Mode::Enzyme.API.CDerivativeMode, width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, edges::Vector{…})
#     @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6131
#   [7] thunk_generator(world::UInt64, source::Union{…}, FA::Type, A::Type, TT::Type, Mode::Enzyme.API.CDerivativeMode, Width::Int64, ModifiedBetween::NTuple{…} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, self::Any, fakeworld::Any, fa::Type, a::Type, tt::Type, mode::Type, width::Type, modifiedbetween::Type, returnprimal::Type, shadowinit::Type, abi::Type, erriffuncwritten::Type, runtimeactivity::Type, strongzero::Type)
#     @ Enzyme.Compiler ~/projects/qruise/Enzyme.jl/src/compiler.jl:6275
#   [8] autodiff
#     @ ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:502 [inlined]
#   [9] autodiff
#     @ ~/projects/qruise/Enzyme.jl/src/Enzyme.jl:542 [inlined]
#  [10] macro expansion
#     @ ~/projects/qruise/Enzyme.jl/src/sugar.jl:286 [inlined]
#  [11] gradient(::ReverseMode{…}, ::typeof(func_ccall), ::Float64, ::Const{…}, ::Const{…})
#     @ Enzyme ~/projects/qruise/Enzyme.jl/src/sugar.jl:273
#  [12] top-level scope
#     @ REPL[17]:1

ymardoukhi avatar Nov 04 '25 21:11 ymardoukhi