Enzyme
Enzyme copied to clipboard
Incorrect value error
https://fwd.gymni.ch/SH5Lj4
Adding the optnone prevents the issue, not having it has the issue
function grad(K, dK, acc, dacc, N)
Base.llvmcall(("""
; ModuleID = '/tmp/compiler-explorer-compiler202358-73-l9tv0f.2f1mg/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"
@.str = private unnamed_addr constant [5 x i8] c"f%d\\0A\\00", align 1
; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0
declare void @__enzyme_autodiff(...) local_unnamed_addr
define void @dsquare(double %arg, {} addrspace(10)* %a, {} addrspace(10)* %da, {} addrspace(10)* %b, {} addrspace(10)* %db) local_unnamed_addr {
bb:
call void @diffesq({} addrspace(10)* %a, {} addrspace(10)* %da, {} addrspace(10)* %b, {} addrspace(10)* %db, i64 16)
ret void
}
; Function Attrs: noinline
define void @sq({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg, {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg1, i64 %loopsize) #1 !dbg !6 {
entry:
%i = call {}*** @julia.get_pgcstack()
%i3 = bitcast {} addrspace(10)* %arg1 to double addrspace(13)* addrspace(10)*
%i4 = addrspacecast double addrspace(13)* addrspace(10)* %i3 to double addrspace(13)* addrspace(11)*
%i5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !14, !noalias !19, !nonnull !8
%i6 = bitcast {} addrspace(10)* %arg to double addrspace(13)* addrspace(10)*
%i7 = addrspacecast double addrspace(13)* addrspace(10)* %i6 to double addrspace(13)* addrspace(11)*
%i8 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i7, align 16, !tbaa !24, !invariant.load !8, !alias.scope !26, !noalias !27, !nonnull !8
%i12 = getelementptr inbounds double, double addrspace(13)* %i5, i64 1, !dbg !28
%i13 = load double, double addrspace(13)* %i12, align 8, !dbg !28, !tbaa !38, !alias.scope !41, !noalias !42
%i14 = getelementptr inbounds double, double addrspace(13)* %i8, i64 1, !dbg !43
%i15 = load double, double addrspace(13)* %i14, align 8, !dbg !43, !tbaa !38, !alias.scope !41, !noalias !42
%i16 = fadd double %i13, %i15, !dbg !44
store double %i16, double addrspace(13)* %i12, align 8, !dbg !47, !tbaa !38, !alias.scope !41, !noalias !50
%i87 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !14, !noalias !19, !nonnull !8
br label %L51.i, !dbg !51
L51.i: ; preds = %L51.i, %entry
%value_phi3.i9 = phi i64 [ 0, %entry ], [ %i122, %L51.i ]
%a2 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %value_phi3.i9)
%i95 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %value_phi3.i9, !dbg !53
%i96 = load double, double addrspace(13)* %i95, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
%i97 = add nuw i64 %value_phi3.i9, 16, !dbg !56
%i98 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i97, !dbg !56
%i99 = load double, double addrspace(13)* %i98, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
%i100 = fadd double %i96, %i99, !dbg !57
store double %i100, double addrspace(13)* %i95, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
%i101 = or i64 %value_phi3.i9, 1, !dbg !59
%i102 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i101, !dbg !53
%i103 = load double, double addrspace(13)* %i102, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
%i104 = add nuw i64 %i101, 16, !dbg !56
%i105 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i104, !dbg !56
%i106 = load double, double addrspace(13)* %i105, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
%i107 = fadd double %i103, %i106, !dbg !57
store double %i107, double addrspace(13)* %i102, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
%i108 = or i64 %value_phi3.i9, 2, !dbg !59
%i109 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i108, !dbg !53
%i110 = load double, double addrspace(13)* %i109, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
%i111 = add nuw i64 %i108, 16, !dbg !56
%i112 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i111, !dbg !56
%i113 = load double, double addrspace(13)* %i112, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
%i114 = fadd double %i110, %i113, !dbg !57
store double %i114, double addrspace(13)* %i109, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
%i115 = or i64 %value_phi3.i9, 3, !dbg !59
%i116 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i115, !dbg !53
%i117 = load double, double addrspace(13)* %i116, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
%i118 = add nuw i64 %i115, 16, !dbg !56
%i119 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i118, !dbg !56
%i120 = load double, double addrspace(13)* %i119, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
%i121 = fadd double %i117, %i120, !dbg !57
store double %i121, double addrspace(13)* %i116, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
%i122 = add nuw nsw i64 %value_phi3.i9, 4, !dbg !59
%niter.ncmp.3.not = icmp eq i64 %i122, %loopsize, !dbg !51
br i1 %niter.ncmp.3.not, label %L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, label %L51.i, !dbg !51, !llvm.loop !63
L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %L51.i
ret void, !dbg !64
}
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr
; Function Attrs: mustprogress noinline willreturn
define internal void @diffesq({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg, {} addrspace(10)* nocapture nofree align 16 %"arg'", {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg1, {} addrspace(10)* nocapture nofree align 16 %"arg1'", i64 %loopsize) #20 !dbg !65 {
entry:
%"iv'ac" = alloca i64, align 8
%loopLimit_cache = alloca i64, align 8
%"i16'de" = alloca double, align 8
store double 0.000000e+00, double* %"i16'de", align 8
%"i13'de" = alloca double, align 8
store double 0.000000e+00, double* %"i13'de", align 8
%"i15'de" = alloca double, align 8
store double 0.000000e+00, double* %"i15'de", align 8
%"i121'de" = alloca double, align 8
store double 0.000000e+00, double* %"i121'de", align 8
%"i117'de" = alloca double, align 8
store double 0.000000e+00, double* %"i117'de", align 8
%"i120'de" = alloca double, align 8
store double 0.000000e+00, double* %"i120'de", align 8
%"i114'de" = alloca double, align 8
store double 0.000000e+00, double* %"i114'de", align 8
%"i110'de" = alloca double, align 8
store double 0.000000e+00, double* %"i110'de", align 8
%"i113'de" = alloca double, align 8
store double 0.000000e+00, double* %"i113'de", align 8
%"i107'de" = alloca double, align 8
store double 0.000000e+00, double* %"i107'de", align 8
%"i103'de" = alloca double, align 8
store double 0.000000e+00, double* %"i103'de", align 8
%"i106'de" = alloca double, align 8
store double 0.000000e+00, double* %"i106'de", align 8
%"i100'de" = alloca double, align 8
store double 0.000000e+00, double* %"i100'de", align 8
%"i96'de" = alloca double, align 8
store double 0.000000e+00, double* %"i96'de", align 8
%"i99'de" = alloca double, align 8
store double 0.000000e+00, double* %"i99'de", align 8
%i = call {}*** @julia.get_pgcstack() #3
%"i3'ipc" = bitcast {} addrspace(10)* %"arg1'" to double addrspace(13)* addrspace(10)*
%i3 = bitcast {} addrspace(10)* %arg1 to double addrspace(13)* addrspace(10)*
%"i4'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i3'ipc" to double addrspace(13)* addrspace(11)*
%i4 = addrspacecast double addrspace(13)* addrspace(10)* %i3 to double addrspace(13)* addrspace(11)*
%"i5'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16, !tbaa !9, !alias.scope !66, !noalias !71, !nonnull !8
%i5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !73, !noalias !74, !nonnull !8
%"i6'ipc" = bitcast {} addrspace(10)* %"arg'" to double addrspace(13)* addrspace(10)*
%i6 = bitcast {} addrspace(10)* %arg to double addrspace(13)* addrspace(10)*
%"i7'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i6'ipc" to double addrspace(13)* addrspace(11)*
%i7 = addrspacecast double addrspace(13)* addrspace(10)* %i6 to double addrspace(13)* addrspace(11)*
%"i8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i7'ipc", align 16, !tbaa !24, !alias.scope !75, !noalias !78, !nonnull !8
%i8 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i7, align 16, !tbaa !24, !invariant.load !8, !alias.scope !80, !noalias !81, !nonnull !8
%"i12'ipg" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 1, !dbg !82
%i12 = getelementptr inbounds double, double addrspace(13)* %i5, i64 1, !dbg !82
%i13 = load double, double addrspace(13)* %i12, align 8, !dbg !82, !tbaa !38, !alias.scope !87, !noalias !90
%"i14'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 1, !dbg !92
%i14 = getelementptr inbounds double, double addrspace(13)* %i8, i64 1, !dbg !92
%i15 = load double, double addrspace(13)* %i14, align 8, !dbg !92, !tbaa !38, !alias.scope !93, !noalias !96
%i16 = fadd double %i13, %i15, !dbg !98
store double %i16, double addrspace(13)* %i12, align 8, !dbg !99, !tbaa !38, !alias.scope !87, !noalias !100
%"i87'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16, !tbaa !9, !alias.scope !66, !noalias !71, !nonnull !8
%i87 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !73, !noalias !74, !nonnull !8
br label %L51.i, !dbg !101
L51.i: ; preds = %L51.i, %entry
%iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
%iv.next = add nuw nsw i64 %iv, 1
%0 = shl i64 %iv, 2
%a2 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %0) #3
%"i95'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %0, !dbg !103
%i95 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %0, !dbg !103
%i96 = load double, double addrspace(13)* %i95, align 8, !dbg !103, !tbaa !38
%i97 = add nuw i64 %0, 16, !dbg !111
%"i98'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97, !dbg !111
%i98 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i97, !dbg !111
%i99 = load double, double addrspace(13)* %i98, align 8, !dbg !111, !tbaa !38
%i100 = fadd double %i96, %i99, !dbg !112
store double %i100, double addrspace(13)* %i95, align 8, !dbg !113, !tbaa !38
%i101 = or i64 %0, 1, !dbg !115
%"i102'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i101, !dbg !103
%i102 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i101, !dbg !103
%i103 = load double, double addrspace(13)* %i102, align 8, !dbg !103, !tbaa !38
%i104 = add nuw i64 %i101, 16, !dbg !111
%"i105'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104, !dbg !111
%i105 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i104, !dbg !111
%i106 = load double, double addrspace(13)* %i105, align 8, !dbg !111, !tbaa !38
%i107 = fadd double %i103, %i106, !dbg !112
store double %i107, double addrspace(13)* %i102, align 8, !dbg !113, !tbaa !38
%i108 = or i64 %0, 2, !dbg !115
%"i109'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i108, !dbg !103
%i109 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i108, !dbg !103
%i110 = load double, double addrspace(13)* %i109, align 8, !dbg !103, !tbaa !38
%i111 = add nuw i64 %i108, 16, !dbg !111
%"i112'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111, !dbg !111
%i112 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i111, !dbg !111
%i113 = load double, double addrspace(13)* %i112, align 8, !dbg !111, !tbaa !38
%i114 = fadd double %i110, %i113, !dbg !112
store double %i114, double addrspace(13)* %i109, align 8, !dbg !113, !tbaa !38
%i115 = or i64 %0, 3, !dbg !115
%"i116'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i115, !dbg !103
%i116 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i115, !dbg !103
%i117 = load double, double addrspace(13)* %i116, align 8, !dbg !103, !tbaa !38
%i118 = add nuw i64 %i115, 16, !dbg !111
%"i119'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118, !dbg !111
%i119 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i118, !dbg !111
%i120 = load double, double addrspace(13)* %i119, align 8, !dbg !111, !tbaa !38
%i121 = fadd double %i117, %i120, !dbg !112
store double %i121, double addrspace(13)* %i116, align 8, !dbg !113, !tbaa !38
%i122 = add nuw nsw i64 %0, 4, !dbg !115
%niter.ncmp.3.not = icmp eq i64 %i122, %loopsize, !dbg !101
br i1 %niter.ncmp.3.not, label %L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, label %L51.i, !dbg !101, !llvm.loop !117
L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %L51.i
%1 = phi i64 [ %iv, %L51.i ], !dbg !118
store i64 %1, i64* %loopLimit_cache, align 8, !dbg !118, !invariant.group !119
br label %invertL48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, !dbg !118
invertentry: ; preds = %invertL51.i
%2 = load double, double addrspace(13)* %"i12'ipg", align 8, !tbaa !38, !alias.scope !120, !noalias !121
store double 0.000000e+00, double addrspace(13)* %"i12'ipg", align 8, !dbg !99, !tbaa !38, !alias.scope !120, !noalias !121
%3 = load double, double* %"i16'de", align 8
%4 = fadd fast double %3, %2
store double %4, double* %"i16'de", align 8
%5 = load double, double* %"i16'de", align 8
store double 0.000000e+00, double* %"i16'de", align 8
%6 = load double, double* %"i13'de", align 8
%7 = fadd fast double %6, %5
store double %7, double* %"i13'de", align 8
%8 = load double, double* %"i15'de", align 8
%9 = fadd fast double %8, %5
store double %9, double* %"i15'de", align 8
%10 = load double, double* %"i15'de", align 8
store double 0.000000e+00, double* %"i15'de", align 8
%11 = load double, double addrspace(13)* %"i14'ipg", align 8, !dbg !92, !tbaa !38
%12 = fadd fast double %11, %10
store double %12, double addrspace(13)* %"i14'ipg", align 8, !dbg !92, !tbaa !38
%13 = load double, double* %"i13'de", align 8
store double 0.000000e+00, double* %"i13'de", align 8
%14 = load double, double addrspace(13)* %"i12'ipg", align 8, !dbg !82, !tbaa !38
%15 = fadd fast double %14, %13
store double %15, double addrspace(13)* %"i12'ipg", align 8, !dbg !82, !tbaa !38
ret void
invertL51.i: ; preds = %mergeinvertL51.i_L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, %incinvertL51.i
%16 = load i64, i64* %"iv'ac", align 8
%_unwrap = shl i64 %16, 2
%i115_unwrap = or i64 %_unwrap, 3
%"i116'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i115_unwrap
%17 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !tbaa !38
store double 0.000000e+00, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !113, !tbaa !38
%18 = load double, double* %"i121'de", align 8
%19 = fadd fast double %18, %17
store double %19, double* %"i121'de", align 8
%20 = load double, double* %"i121'de", align 8
store double 0.000000e+00, double* %"i121'de", align 8
%21 = load double, double* %"i117'de", align 8
%22 = fadd fast double %21, %20
store double %22, double* %"i117'de", align 8
%23 = load double, double* %"i120'de", align 8
%24 = fadd fast double %23, %20
store double %24, double* %"i120'de", align 8
%25 = load double, double* %"i120'de", align 8
store double 0.000000e+00, double* %"i120'de", align 8
%26 = load i64, i64* %"iv'ac", align 8
%i118_unwrap = add nuw i64 %i115_unwrap, 16
%"i119'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118_unwrap
%27 = load double, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%28 = fadd fast double %27, %25
store double %28, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%29 = load double, double* %"i117'de", align 8
store double 0.000000e+00, double* %"i117'de", align 8
%30 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%31 = fadd fast double %30, %29
store double %31, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%32 = load i64, i64* %"iv'ac", align 8
%i108_unwrap = or i64 %_unwrap, 2
%"i109'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i108_unwrap
%33 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !tbaa !38
store double 0.000000e+00, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !113, !tbaa !38
%34 = load double, double* %"i114'de", align 8
%35 = fadd fast double %34, %33
store double %35, double* %"i114'de", align 8
%36 = load double, double* %"i114'de", align 8
store double 0.000000e+00, double* %"i114'de", align 8
%37 = load double, double* %"i110'de", align 8
%38 = fadd fast double %37, %36
store double %38, double* %"i110'de", align 8
%39 = load double, double* %"i113'de", align 8
%40 = fadd fast double %39, %36
store double %40, double* %"i113'de", align 8
%41 = load double, double* %"i113'de", align 8
store double 0.000000e+00, double* %"i113'de", align 8
%42 = load i64, i64* %"iv'ac", align 8
%i111_unwrap = add nuw i64 %i108_unwrap, 16
%"i112'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111_unwrap
%43 = load double, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%44 = fadd fast double %43, %41
store double %44, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%45 = load double, double* %"i110'de", align 8
store double 0.000000e+00, double* %"i110'de", align 8
%46 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%47 = fadd fast double %46, %45
store double %47, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%48 = load i64, i64* %"iv'ac", align 8
%i101_unwrap = or i64 %_unwrap, 1
%"i102'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i101_unwrap
%49 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !tbaa !38
store double 0.000000e+00, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !113, !tbaa !38
%50 = load double, double* %"i107'de", align 8
%51 = fadd fast double %50, %49
store double %51, double* %"i107'de", align 8
%52 = load double, double* %"i107'de", align 8
store double 0.000000e+00, double* %"i107'de", align 8
%53 = load double, double* %"i103'de", align 8
%54 = fadd fast double %53, %52
store double %54, double* %"i103'de", align 8
%55 = load double, double* %"i106'de", align 8
%56 = fadd fast double %55, %52
store double %56, double* %"i106'de", align 8
%57 = load double, double* %"i106'de", align 8
store double 0.000000e+00, double* %"i106'de", align 8
%58 = load i64, i64* %"iv'ac", align 8
%i104_unwrap = add nuw i64 %i101_unwrap, 16
%"i105'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104_unwrap
%59 = load double, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%60 = fadd fast double %59, %57
store double %60, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%61 = load double, double* %"i103'de", align 8
store double 0.000000e+00, double* %"i103'de", align 8
%62 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%63 = fadd fast double %62, %61
store double %63, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%64 = load i64, i64* %"iv'ac", align 8
%"i95'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %_unwrap
%65 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8, !tbaa !38
store double 0.000000e+00, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !113, !tbaa !38
%66 = load double, double* %"i100'de", align 8
%67 = fadd fast double %66, %65
store double %67, double* %"i100'de", align 8
%68 = load double, double* %"i100'de", align 8
store double 0.000000e+00, double* %"i100'de", align 8
%69 = load double, double* %"i96'de", align 8
%70 = fadd fast double %69, %68
store double %70, double* %"i96'de", align 8
%71 = load double, double* %"i99'de", align 8
%72 = fadd fast double %71, %68
store double %72, double* %"i99'de", align 8
%73 = load double, double* %"i99'de", align 8
store double 0.000000e+00, double* %"i99'de", align 8
%74 = load i64, i64* %"iv'ac", align 8
%i97_unwrap = add nuw i64 %_unwrap, 16
%"i98'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97_unwrap
%75 = load double, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%76 = fadd fast double %75, %73
store double %76, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !111, !tbaa !38
%77 = load double, double* %"i96'de", align 8
store double 0.000000e+00, double* %"i96'de", align 8
%78 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%79 = fadd fast double %78, %77
store double %79, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !103, !tbaa !38
%80 = load i64, i64* %"iv'ac", align 8
%81 = icmp eq i64 %80, 0
%82 = xor i1 %81, true
br i1 %81, label %invertentry, label %incinvertL51.i
incinvertL51.i: ; preds = %invertL51.i
%83 = load i64, i64* %"iv'ac", align 8
%84 = add nsw i64 %83, -1
store i64 %84, i64* %"iv'ac", align 8
br label %invertL51.i
invertL48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa
%85 = load i64, i64* %loopLimit_cache, align 8, !invariant.group !119
br label %mergeinvertL51.i_L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa
mergeinvertL51.i_L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %invertL48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa
store i64 %85, i64* %"iv'ac", align 8
br label %invertL51.i
}
attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { noinline }
attributes #2 = { mustprogress noinline willreturn }
attributes #20 = { mustprogress noinline optnone willreturn }
attributes #3 = { mustprogress willreturn }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17}
!15 = distinct !{!15, !16, !"na_addr13"}
!16 = distinct !{!16, !"addr13"}
!17 = !{!"jnoalias_typemd", !18}
!18 = !{!"jnoalias"}
!19 = !{!20, !21, !22, !23}
!20 = !{!"jnoalias_gcframe", !18}
!21 = !{!"jnoalias_stack", !18}
!22 = !{!"jnoalias_data", !18}
!23 = !{!"jnoalias_const", !18}
!24 = !{!25, !25, i64 0, i64 1}
!25 = !{!"jtbaa_const", !12, i64 0}
!26 = !{!15, !23}
!27 = !{!20, !21, !22, !17}
!28 = !DILocation(line: 13, scope: !29, inlinedAt: !31)
!29 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !30, file: !30, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!30 = !DIFile(filename: "essentials.jl", directory: ".")
!31 = distinct !DILocation(line: 12, scope: !32, inlinedAt: !33)
!32 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!33 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !36)
!34 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !35, file: !35, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!35 = !DIFile(filename: "simdloop.jl", directory: ".")
!36 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !37)
!37 = distinct !DILocation(line: 0, scope: !6)
!38 = !{!39, !39, i64 0}
!39 = !{!"jtbaa_arraybuf", !40, i64 0}
!40 = !{!"jtbaa_data", !12, i64 0}
!41 = !{!22}
!42 = !{!20, !21, !17, !23}
!43 = !DILocation(line: 14, scope: !29, inlinedAt: !31)
!44 = !DILocation(line: 408, scope: !45, inlinedAt: !31)
!45 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !46, file: !46, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!46 = !DIFile(filename: "float.jl", directory: ".")
!47 = !DILocation(line: 969, scope: !48, inlinedAt: !31)
!48 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !49, file: !49, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!49 = !DIFile(filename: "array.jl", directory: ".")
!50 = !{!15, !20, !21, !17, !23}
!51 = !DILocation(line: 75, scope: !34, inlinedAt: !52)
!52 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !37)
!53 = !DILocation(line: 13, scope: !29, inlinedAt: !54)
!54 = distinct !DILocation(line: 15, scope: !32, inlinedAt: !55)
!55 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !52)
!56 = !DILocation(line: 14, scope: !29, inlinedAt: !54)
!57 = !DILocation(line: 408, scope: !45, inlinedAt: !54)
!58 = !DILocation(line: 969, scope: !48, inlinedAt: !54)
!59 = !DILocation(line: 87, scope: !60, inlinedAt: !62)
!60 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !61, file: !61, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!61 = !DIFile(filename: "int.jl", directory: ".")
!62 = distinct !DILocation(line: 78, scope: !34, inlinedAt: !52)
!63 = distinct !{!63}
!64 = !DILocation(line: 0, scope: !6)
!65 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!66 = !{!67, !69, !17}
!67 = distinct !{!67, !68, !"shadow_0"}
!68 = distinct !{!68, !" diff: %arg1"}
!69 = distinct !{!69, !70, !"na_addr13"}
!70 = distinct !{!70, !"addr13"}
!71 = !{!72, !20, !21, !22, !23}
!72 = distinct !{!72, !68, !"primal"}
!73 = !{!72, !69, !17}
!74 = !{!67, !20, !21, !22, !23}
!75 = !{!76, !69, !23}
!76 = distinct !{!76, !77, !"shadow_0"}
!77 = distinct !{!77, !" diff: %arg"}
!78 = !{!79, !20, !21, !22, !17}
!79 = distinct !{!79, !77, !"primal"}
!80 = !{!79, !69, !23}
!81 = !{!76, !20, !21, !22, !17}
!82 = !DILocation(line: 13, scope: !29, inlinedAt: !83)
!83 = distinct !DILocation(line: 12, scope: !32, inlinedAt: !84)
!84 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !85)
!85 = distinct !DILocation(line: 11, scope: !65, inlinedAt: !86)
!86 = distinct !DILocation(line: 0, scope: !65)
!87 = !{!88, !22}
!88 = distinct !{!88, !89, !"primal"}
!89 = distinct !{!89, !" diff: %i5"}
!90 = !{!91, !20, !21, !17, !23}
!91 = distinct !{!91, !89, !"shadow_0"}
!92 = !DILocation(line: 14, scope: !29, inlinedAt: !83)
!93 = !{!94, !22}
!94 = distinct !{!94, !95, !"primal"}
!95 = distinct !{!95, !" diff: %i8"}
!96 = !{!97, !20, !21, !17, !23}
!97 = distinct !{!97, !95, !"shadow_0"}
!98 = !DILocation(line: 408, scope: !45, inlinedAt: !83)
!99 = !DILocation(line: 969, scope: !48, inlinedAt: !83)
!100 = !{!91, !69, !20, !21, !17, !23}
!101 = !DILocation(line: 75, scope: !34, inlinedAt: !102)
!102 = distinct !DILocation(line: 14, scope: !65, inlinedAt: !86)
!103 = !DILocation(line: 13, scope: !29, inlinedAt: !104)
!104 = distinct !DILocation(line: 15, scope: !32, inlinedAt: !105)
!105 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !102)
!106 = !{!107, !22}
!107 = distinct !{!107, !108, !"primal"}
!108 = distinct !{!108, !" diff: %i87"}
!109 = !{!110, !20, !21, !17, !23}
!110 = distinct !{!110, !108, !"shadow_0"}
!111 = !DILocation(line: 14, scope: !29, inlinedAt: !104)
!112 = !DILocation(line: 408, scope: !45, inlinedAt: !104)
!113 = !DILocation(line: 969, scope: !48, inlinedAt: !104)
!114 = !{!110, !69, !20, !21, !17, !23}
!115 = !DILocation(line: 87, scope: !60, inlinedAt: !116)
!116 = distinct !DILocation(line: 78, scope: !34, inlinedAt: !102)
!117 = distinct !{!117}
!118 = !DILocation(line: 0, scope: !65)
!119 = distinct !{}
!120 = !{!91, !22}
!121 = !{!88, !69, !20, !21, !17, !23}
!122 = !{!97, !22}
!123 = !{!94, !20, !21, !17, !23}
!124 = !{!88, !20, !21, !17, !23}
!125 = !{!110, !22}
!126 = !{!107, !69, !20, !21, !17, !23}
!127 = !{!107, !20, !21, !17, !23}
""","dsquare"), Cvoid, Tuple{Any,Any,Any,Any,Int64},
K,dK, acc,dacc,N)
end
K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
grad(K, dK, acc,dacc, N)
@show dK
Intended output (1's):
f0
f4
f8
f12
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
Bad output:
f0
f4
f8
f12
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
Post mem2reg keeps bug O1 destroys the bug (input here is the mem2reg): https://godbolt.org/z/MMTefhjj1
Output of that godbolt (adding instcombine and simplifycfg) still errors.
origin julia code:
using LinearAlgebra
using Enzyme
using EnzymeTestUtils
Enzyme.API.printall!(true)
function local_sensitivity_non_mutating(K, acc, N)
# acc = summm(K)
@inbounds @simd for i in 1:16
acc[i] += K[i,1]
end
@inbounds @simd for i in 1:N
acc[i] += K[i,2]
end
return
end
# differentiate sum(coupled_springs(K, m, x0, v0, Ktmp, xtmp, vtmp; dt = 0.001, T = 1.0)) w.r.t. K
N = 16
M = 16
K = ones(N, M)
m = 0.5 .+ 0.5 * rand(N)
x0 = ones(M)
v0 = zeros(M)
function enzyme_gradient(f::F, K, m, x0, v0, T) where F
dK = zero(K)
acc = Mzeros(N)
dacc = zeros(N)
dacc[2] = 1.0
Enzyme.autodiff(Reverse, Const(f), Const, Duplicated(K, dK), Duplicated(acc, dacc), Const(N))
@show dK
nothing
# forward, pullback = Enzyme.autodiff_thunk(
# ReverseSplitModified(ReverseSplitWithPrimal, Val((false, false, false, false, false, false))),
# Const{F}, Active{Float64},
# Duplicated{typeof(K)}, Duplicated{typeof(m)}, Duplicated{typeof(x0)}, Duplicated{typeof(v0)}, Const{typeof(T)})
# tape, primal, shadow = forward(Const(f), Duplicated(K, dK), Duplicated(m, dm), Duplicated(x0, dx0), Duplicated(v0, dv0), Const(T))
# dret = pullback(Const(f), Duplicated(K, dK), Duplicated(m, dm), Duplicated(x0, dx0), Duplicated(v0, dv0), Const(T), one(eltype(K)), tape)
# return dK
end
dK1 = enzyme_gradient(local_sensitivity_non_mutating, K, m, x0, v0, 0.001)
Not erroring:
function grad(K, dK, acc, dacc, N)
@GC.preserve dK dacc Base.llvmcall(("""
; ModuleID = '/app/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"
; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0
declare void @__enzyme_autodiff(...) local_unnamed_addr
define i64 @dsquare({} addrspace(10)* %da, i64 %db) local_unnamed_addr {
bb:
%ddb = inttoptr i64 %db to double*
%r = call i64 @diffesq({} addrspace(10)* %da, double* %ddb, i64 4)
ret i64 %r
}
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq({} addrspace(10)* nocapture nofree align 16 %"arg'", double* %"i5'ipl", i64 %loopsize) #1 !dbg !6 {
entry:
%i = call {}*** @julia.get_pgcstack() #2
%"i6'ipc" = bitcast {} addrspace(10)* %"arg'" to double addrspace(13)* addrspace(10)*
%"i7'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i6'ipc" to double addrspace(13)* addrspace(11)*
%"i8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i7'ipc", align 16
%"i12'ipg" = getelementptr inbounds double, double* %"i5'ipl", i64 1, !dbg !38
%"i14'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 1, !dbg !56
br label %L51.i, !dbg !69
L51.i: ; preds = %L51.i, %entry
%iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
%iv.next = add i64 %iv, 1
%niter.ncmp.3.not = icmp eq i64 %iv.next, %loopsize, !dbg !69
br i1 %niter.ncmp.3.not, label %invertL51.i, label %L51.i, !dbg !69, !llvm.loop !89
invertentry: ; preds = %invertL51.i
%i2 = load double, double* %"i12'ipg", align 8
store double 0.000000e+00, double* %"i12'ipg", align 8, !dbg !65
%i9 = load double, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
%i10 = fadd fast double %i9, %i2
store double %i10, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
%i11 = load double, double* %"i12'ipg", align 8, !dbg !38
%i17 = fadd fast double %i11, %i2
store double %i17, double* %"i12'ipg", align 8, !dbg !38
ret i64 %iv
invertL51.i: ; preds = %incinvertL51.i, %L51.i
%"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %iv, %L51.i ]
%_unwrap = shl i64 %"iv'ac.0", 2
%i115_unwrap = or i64 %_unwrap, 3
%"i116'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i115_unwrap
%i18 = load double, double* %"i116'ipg_unwrap", align 8
store double 0.000000e+00, double* %"i116'ipg_unwrap", align 8, !dbg !81
%i118_unwrap = add i64 %_unwrap, 19
%"i119'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118_unwrap
%i19 = load double, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
%i20 = fadd fast double %i19, %i18
store double %i20, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
%i21 = load double, double* %"i116'ipg_unwrap", align 8, !dbg !71
%i22 = fadd fast double %i21, %i18
store double %i22, double* %"i116'ipg_unwrap", align 8, !dbg !71
%i108_unwrap = or i64 %_unwrap, 2
%"i109'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i108_unwrap
%i23 = load double, double* %"i109'ipg_unwrap", align 8
store double 0.000000e+00, double* %"i109'ipg_unwrap", align 8, !dbg !81
%i111_unwrap = add i64 %_unwrap, 18
%"i112'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111_unwrap
%i24 = load double, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
%i25 = fadd fast double %i24, %i23
store double %i25, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
%i26 = load double, double* %"i109'ipg_unwrap", align 8, !dbg !71
%i27 = fadd fast double %i26, %i23
store double %i27, double* %"i109'ipg_unwrap", align 8, !dbg !71
%i101_unwrap = or i64 %_unwrap, 1
%"i102'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i101_unwrap
%i28 = load double, double* %"i102'ipg_unwrap", align 8
store double 0.000000e+00, double* %"i102'ipg_unwrap", align 8, !dbg !81
%i104_unwrap = add i64 %_unwrap, 17
%"i105'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104_unwrap
%i29 = load double, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
%i30 = fadd fast double %i29, %i28
store double %i30, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
%a19 = load double, double* %"i102'ipg_unwrap", align 8, !dbg !71
%a20 = fadd fast double %a19, %i28
store double %a20, double* %"i102'ipg_unwrap", align 8, !dbg !71
%"i95'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %_unwrap
%a21 = load double, double* %"i95'ipg_unwrap", align 8
store double 0.000000e+00, double* %"i95'ipg_unwrap", align 8, !dbg !81
%i97_unwrap = add i64 %_unwrap, 16
%"i98'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97_unwrap
%a22 = load double, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
%a23 = fadd fast double %a22, %a21
store double %a23, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
%a26 = icmp eq i64 %"iv'ac.0", 0
%a27 = add i64 %"iv'ac.0", -1
br i1 %a26, label %invertentry, label %invertL51.i
}
attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { mustprogress willreturn }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17, !19}
!15 = distinct !{!15, !16, !"shadow_0"}
!16 = distinct !{!16, !" diff: %arg1"}
!17 = distinct !{!17, !18, !"na_addr13"}
!18 = distinct !{!18, !"addr13"}
!19 = !{!"jnoalias_typemd", !20}
!20 = !{!"jnoalias"}
!21 = !{!22, !23, !24, !25, !26}
!22 = distinct !{!22, !16, !"primal"}
!23 = !{!"jnoalias_gcframe", !20}
!24 = !{!"jnoalias_stack", !20}
!25 = !{!"jnoalias_data", !20}
!26 = !{!"jnoalias_const", !20}
!27 = !{!22, !17, !19}
!28 = !{!15, !23, !24, !25, !26}
!29 = !{!30, !30, i64 0, i64 1}
!30 = !{!"jtbaa_const", !12, i64 0}
!31 = !{!32, !17, !26}
!32 = distinct !{!32, !33, !"shadow_0"}
!33 = distinct !{!33, !" diff: %arg"}
!34 = !{!35, !23, !24, !25, !19}
!35 = distinct !{!35, !33, !"primal"}
!36 = !{!35, !17, !26}
!37 = !{!32, !23, !24, !25, !19}
!38 = !DILocation(line: 13, scope: !39, inlinedAt: !41)
!39 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !40, file: !40, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!40 = !DIFile(filename: "essentials.jl", directory: ".")
!41 = distinct !DILocation(line: 12, scope: !42, inlinedAt: !43)
!42 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!43 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !46)
!44 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !45, file: !45, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!45 = !DIFile(filename: "simdloop.jl", directory: ".")
!46 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !47)
!47 = distinct !DILocation(line: 0, scope: !6)
!48 = !{!49, !49, i64 0}
!49 = !{!"jtbaa_arraybuf", !50, i64 0}
!50 = !{!"jtbaa_data", !12, i64 0}
!51 = !{!52, !25}
!52 = distinct !{!52, !53, !"primal"}
!53 = distinct !{!53, !" diff: %i5"}
!54 = !{!55, !23, !24, !19, !26}
!55 = distinct !{!55, !53, !"shadow_0"}
!56 = !DILocation(line: 14, scope: !39, inlinedAt: !41)
!57 = !{!58, !25}
!58 = distinct !{!58, !59, !"primal"}
!59 = distinct !{!59, !" diff: %i8"}
!60 = !{!61, !23, !24, !19, !26}
!61 = distinct !{!61, !59, !"shadow_0"}
!62 = !DILocation(line: 408, scope: !63, inlinedAt: !41)
!63 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!64 = !DIFile(filename: "float.jl", directory: ".")
!65 = !DILocation(line: 969, scope: !66, inlinedAt: !41)
!66 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !67, file: !67, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!55, !17, !23, !24, !19, !26}
!69 = !DILocation(line: 75, scope: !44, inlinedAt: !70)
!70 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !47)
!71 = !DILocation(line: 13, scope: !39, inlinedAt: !72)
!72 = distinct !DILocation(line: 15, scope: !42, inlinedAt: !73)
!73 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !70)
!74 = !{!75, !25}
!75 = distinct !{!75, !76, !"primal"}
!76 = distinct !{!76, !" diff: %i87"}
!77 = !{!78, !23, !24, !19, !26}
!78 = distinct !{!78, !76, !"shadow_0"}
!79 = !DILocation(line: 14, scope: !39, inlinedAt: !72)
!80 = !DILocation(line: 408, scope: !63, inlinedAt: !72)
!81 = !DILocation(line: 969, scope: !66, inlinedAt: !72)
!82 = !{!78, !17, !23, !24, !19, !26}
!83 = !DILocation(line: 87, scope: !84, inlinedAt: !86)
!84 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !85, file: !85, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!85 = !DIFile(filename: "int.jl", directory: ".")
!86 = distinct !DILocation(line: 78, scope: !44, inlinedAt: !70)
!87 = !{!75}
!88 = !{!78}
!89 = distinct !{!89}
!90 = !{!55, !25}
!91 = !{!52, !17, !23, !24, !19, !26}
!92 = !{!61, !25}
!93 = !{!58, !23, !24, !19, !26}
!94 = !{!52, !23, !24, !19, !26}
!95 = !{!78, !25}
!96 = !{!75, !23, !24, !19, !26}
!97 = !{!75, !17, !23, !24, !19, !26}
""","dsquare"), Int64, Tuple{Any,Ptr{Float64},Int64},
dK, pointer(dacc),N)
end
K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
@show grad(K, dK, acc,dacc, N)
@show dK
Does not fail, when address space 10:
function grad(K, dK, acc, dacc, N)
GC.@preserve dK dacc Base.llvmcall(("""
; ModuleID = '/app/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"
; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0
declare void @__enzyme_autodiff(...) local_unnamed_addr
define i64 @dsquare({} addrspace(10)* %da, {} addrspace(10)* %db) local_unnamed_addr {
bb:
%"i6'ipc" = bitcast {} addrspace(10)* %da to double* addrspace(10)*
%"i7'ipc" = addrspacecast double* addrspace(10)* %"i6'ipc" to double* addrspace(11)*
%"i8'ipl" = load double*, double* addrspace(11)* %"i7'ipc", align 16
%"i3'ipc" = bitcast {} addrspace(10)* %db to double addrspace(13)* addrspace(10)*
%"i4'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i3'ipc" to double addrspace(13)* addrspace(11)*
%"i5'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16
%r = call i64 @diffesq(double* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 4)
ret i64 %r
}
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 %loopsize) #1 !dbg !6 {
entry:
%i = call {}*** @julia.get_pgcstack() #2
%"i12'ipg" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 1, !dbg !38
%"i14'ipg" = getelementptr inbounds double, double* %"i8'ipl", i64 1, !dbg !56
br label %L51.i, !dbg !69
L51.i: ; preds = %L51.i, %entry
%iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
%iv.next = add i64 %iv, 1
%niter.ncmp.3.not = icmp eq i64 %iv.next, %loopsize, !dbg !69
br i1 %niter.ncmp.3.not, label %invertL51.i, label %L51.i, !dbg !69, !llvm.loop !89
invertentry: ; preds = %invertL51.i
%i2 = load double, double addrspace(13)* %"i12'ipg", align 8
store double 0.000000e+00, double addrspace(13)* %"i12'ipg", align 8, !dbg !65
%i9 = load double, double* %"i14'ipg", align 8, !dbg !56
%i10 = fadd fast double %i9, %i2
store double %i10, double* %"i14'ipg", align 8, !dbg !56
%i11 = load double, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
%i17 = fadd fast double %i11, %i2
store double %i17, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
ret i64 %iv
invertL51.i: ; preds = %incinvertL51.i, %L51.i
%"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %iv, %L51.i ]
%_unwrap = shl i64 %"iv'ac.0", 2
%i115_unwrap = or i64 %_unwrap, 3
%"i116'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i115_unwrap
%i18 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !81
%i118_unwrap = add i64 %_unwrap, 19
%"i119'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i118_unwrap
%i19 = load double, double* %"i119'ipg_unwrap", align 8, !dbg !79
%i20 = fadd fast double %i19, %i18
store double %i20, double* %"i119'ipg_unwrap", align 8, !dbg !79
%i21 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
%i22 = fadd fast double %i21, %i18
store double %i22, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
%i108_unwrap = or i64 %_unwrap, 2
%"i109'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i108_unwrap
%i23 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !81
%i111_unwrap = add i64 %_unwrap, 18
%"i112'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i111_unwrap
%i24 = load double, double* %"i112'ipg_unwrap", align 8, !dbg !79
%i25 = fadd fast double %i24, %i23
store double %i25, double* %"i112'ipg_unwrap", align 8, !dbg !79
%i26 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
%i27 = fadd fast double %i26, %i23
store double %i27, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
%i101_unwrap = or i64 %_unwrap, 1
%"i102'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i101_unwrap
%i28 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !81
%i104_unwrap = add i64 %_unwrap, 17
%"i105'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i104_unwrap
%i29 = load double, double* %"i105'ipg_unwrap", align 8, !dbg !79
%i30 = fadd fast double %i29, %i28
store double %i30, double* %"i105'ipg_unwrap", align 8, !dbg !79
%a19 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
%a20 = fadd fast double %a19, %i28
store double %a20, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
%"i95'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %_unwrap
%a21 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !81
%i97_unwrap = add i64 %_unwrap, 16
%"i98'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i97_unwrap
%a22 = load double, double* %"i98'ipg_unwrap", align 8, !dbg !79
%a23 = fadd fast double %a22, %a21
store double %a23, double* %"i98'ipg_unwrap", align 8, !dbg !79
%a26 = icmp eq i64 %"iv'ac.0", 0
%a27 = add i64 %"iv'ac.0", -1
br i1 %a26, label %invertentry, label %invertL51.i
}
attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { mustprogress willreturn }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17, !19}
!15 = distinct !{!15, !16, !"shadow_0"}
!16 = distinct !{!16, !" diff: %arg1"}
!17 = distinct !{!17, !18, !"na_addr13"}
!18 = distinct !{!18, !"addr13"}
!19 = !{!"jnoalias_typemd", !20}
!20 = !{!"jnoalias"}
!21 = !{!22, !23, !24, !25, !26}
!22 = distinct !{!22, !16, !"primal"}
!23 = !{!"jnoalias_gcframe", !20}
!24 = !{!"jnoalias_stack", !20}
!25 = !{!"jnoalias_data", !20}
!26 = !{!"jnoalias_const", !20}
!27 = !{!22, !17, !19}
!28 = !{!15, !23, !24, !25, !26}
!29 = !{!30, !30, i64 0, i64 1}
!30 = !{!"jtbaa_const", !12, i64 0}
!31 = !{!32, !17, !26}
!32 = distinct !{!32, !33, !"shadow_0"}
!33 = distinct !{!33, !" diff: %arg"}
!34 = !{!35, !23, !24, !25, !19}
!35 = distinct !{!35, !33, !"primal"}
!36 = !{!35, !17, !26}
!37 = !{!32, !23, !24, !25, !19}
!38 = !DILocation(line: 13, scope: !39, inlinedAt: !41)
!39 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !40, file: !40, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!40 = !DIFile(filename: "essentials.jl", directory: ".")
!41 = distinct !DILocation(line: 12, scope: !42, inlinedAt: !43)
!42 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!43 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !46)
!44 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !45, file: !45, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!45 = !DIFile(filename: "simdloop.jl", directory: ".")
!46 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !47)
!47 = distinct !DILocation(line: 0, scope: !6)
!48 = !{!49, !49, i64 0}
!49 = !{!"jtbaa_arraybuf", !50, i64 0}
!50 = !{!"jtbaa_data", !12, i64 0}
!51 = !{!52, !25}
!52 = distinct !{!52, !53, !"primal"}
!53 = distinct !{!53, !" diff: %i5"}
!54 = !{!55, !23, !24, !19, !26}
!55 = distinct !{!55, !53, !"shadow_0"}
!56 = !DILocation(line: 14, scope: !39, inlinedAt: !41)
!57 = !{!58, !25}
!58 = distinct !{!58, !59, !"primal"}
!59 = distinct !{!59, !" diff: %i8"}
!60 = !{!61, !23, !24, !19, !26}
!61 = distinct !{!61, !59, !"shadow_0"}
!62 = !DILocation(line: 408, scope: !63, inlinedAt: !41)
!63 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!64 = !DIFile(filename: "float.jl", directory: ".")
!65 = !DILocation(line: 969, scope: !66, inlinedAt: !41)
!66 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !67, file: !67, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!55, !17, !23, !24, !19, !26}
!69 = !DILocation(line: 75, scope: !44, inlinedAt: !70)
!70 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !47)
!71 = !DILocation(line: 13, scope: !39, inlinedAt: !72)
!72 = distinct !DILocation(line: 15, scope: !42, inlinedAt: !73)
!73 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !70)
!74 = !{!75, !25}
!75 = distinct !{!75, !76, !"primal"}
!76 = distinct !{!76, !" diff: %i87"}
!77 = !{!78, !23, !24, !19, !26}
!78 = distinct !{!78, !76, !"shadow_0"}
!79 = !DILocation(line: 14, scope: !39, inlinedAt: !72)
!80 = !DILocation(line: 408, scope: !63, inlinedAt: !72)
!81 = !DILocation(line: 969, scope: !66, inlinedAt: !72)
!82 = !{!78, !17, !23, !24, !19, !26}
!83 = !DILocation(line: 87, scope: !84, inlinedAt: !86)
!84 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !85, file: !85, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!85 = !DIFile(filename: "int.jl", directory: ".")
!86 = distinct !DILocation(line: 78, scope: !44, inlinedAt: !70)
!87 = !{!75}
!88 = !{!78}
!89 = distinct !{!89}
!90 = !{!55, !25}
!91 = !{!52, !17, !23, !24, !19, !26}
!92 = !{!61, !25}
!93 = !{!58, !23, !24, !19, !26}
!94 = !{!52, !23, !24, !19, !26}
!95 = !{!78, !25}
!96 = !{!75, !23, !24, !19, !26}
!97 = !{!75, !17, !23, !24, !19, !26}
""","dsquare"), Int64, Tuple{Any,Any,Int64},
dK, dacc,N)
end
K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
@show grad(K, dK, acc,dacc, N)
@show dK
Does fail with address space 13:
function grad(K, dK, acc, dacc, N)
GC.@preserve dK dacc Base.llvmcall(("""
; ModuleID = '/app/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"
; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0
declare void @__enzyme_autodiff(...) local_unnamed_addr
define i64 @dsquare({} addrspace(10)* %da, {} addrspace(10)* %db) local_unnamed_addr {
bb:
%"i6'ipc" = bitcast {} addrspace(10)* %da to double* addrspace(10)*
%"i7'ipc" = addrspacecast double* addrspace(10)* %"i6'ipc" to double addrspace(13)* addrspace(11)*
%"i8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i7'ipc", align 16
%"i3'ipc" = bitcast {} addrspace(10)* %db to double addrspace(13)* addrspace(10)*
%"i4'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i3'ipc" to double addrspace(13)* addrspace(11)*
%"i5'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16
%r = call i64 @diffesq(double addrspace(13)* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 4)
ret i64 %r
}
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double addrspace(13)* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 %loopsize) #1 !dbg !6 {
entry:
%i = call {}*** @julia.get_pgcstack() #2
%"i12'ipg" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 1, !dbg !38
%"i14'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 1, !dbg !56
br label %L51.i, !dbg !69
L51.i: ; preds = %L51.i, %entry
%iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
%iv.next = add i64 %iv, 1
%niter.ncmp.3.not = icmp eq i64 %iv.next, %loopsize, !dbg !69
br i1 %niter.ncmp.3.not, label %invertL51.i, label %L51.i, !dbg !69, !llvm.loop !89
invertentry: ; preds = %invertL51.i
%i2 = load double, double addrspace(13)* %"i12'ipg", align 8
store double 0.000000e+00, double addrspace(13)* %"i12'ipg", align 8, !dbg !65
%i9 = load double, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
%i10 = fadd fast double %i9, %i2
store double %i10, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
%i11 = load double, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
%i17 = fadd fast double %i11, %i2
store double %i17, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
ret i64 %iv
invertL51.i: ; preds = %incinvertL51.i, %L51.i
%"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %iv, %L51.i ]
%_unwrap = shl i64 %"iv'ac.0", 2
%i115_unwrap = or i64 %_unwrap, 3
%"i116'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i115_unwrap
%i18 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !81
%i118_unwrap = add i64 %_unwrap, 19
%"i119'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118_unwrap
%i19 = load double, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
%i20 = fadd fast double %i19, %i18
store double %i20, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
%i21 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
%i22 = fadd fast double %i21, %i18
store double %i22, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
%i108_unwrap = or i64 %_unwrap, 2
%"i109'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i108_unwrap
%i23 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !81
%i111_unwrap = add i64 %_unwrap, 18
%"i112'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111_unwrap
%i24 = load double, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
%i25 = fadd fast double %i24, %i23
store double %i25, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
%i26 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
%i27 = fadd fast double %i26, %i23
store double %i27, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
%i101_unwrap = or i64 %_unwrap, 1
%"i102'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i101_unwrap
%i28 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !81
%i104_unwrap = add i64 %_unwrap, 17
%"i105'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104_unwrap
%i29 = load double, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
%i30 = fadd fast double %i29, %i28
store double %i30, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
%a19 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
%a20 = fadd fast double %a19, %i28
store double %a20, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
%"i95'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %_unwrap
%a21 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8
store double 0.000000e+00, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !81
%i97_unwrap = add i64 %_unwrap, 16
%"i98'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97_unwrap
%a22 = load double, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
%a23 = fadd fast double %a22, %a21
store double %a23, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
%a26 = icmp eq i64 %"iv'ac.0", 0
%a27 = add i64 %"iv'ac.0", -1
br i1 %a26, label %invertentry, label %invertL51.i
}
attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { mustprogress willreturn }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17, !19}
!15 = distinct !{!15, !16, !"shadow_0"}
!16 = distinct !{!16, !" diff: %arg1"}
!17 = distinct !{!17, !18, !"na_addr13"}
!18 = distinct !{!18, !"addr13"}
!19 = !{!"jnoalias_typemd", !20}
!20 = !{!"jnoalias"}
!21 = !{!22, !23, !24, !25, !26}
!22 = distinct !{!22, !16, !"primal"}
!23 = !{!"jnoalias_gcframe", !20}
!24 = !{!"jnoalias_stack", !20}
!25 = !{!"jnoalias_data", !20}
!26 = !{!"jnoalias_const", !20}
!27 = !{!22, !17, !19}
!28 = !{!15, !23, !24, !25, !26}
!29 = !{!30, !30, i64 0, i64 1}
!30 = !{!"jtbaa_const", !12, i64 0}
!31 = !{!32, !17, !26}
!32 = distinct !{!32, !33, !"shadow_0"}
!33 = distinct !{!33, !" diff: %arg"}
!34 = !{!35, !23, !24, !25, !19}
!35 = distinct !{!35, !33, !"primal"}
!36 = !{!35, !17, !26}
!37 = !{!32, !23, !24, !25, !19}
!38 = !DILocation(line: 13, scope: !39, inlinedAt: !41)
!39 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !40, file: !40, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!40 = !DIFile(filename: "essentials.jl", directory: ".")
!41 = distinct !DILocation(line: 12, scope: !42, inlinedAt: !43)
!42 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!43 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !46)
!44 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !45, file: !45, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!45 = !DIFile(filename: "simdloop.jl", directory: ".")
!46 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !47)
!47 = distinct !DILocation(line: 0, scope: !6)
!48 = !{!49, !49, i64 0}
!49 = !{!"jtbaa_arraybuf", !50, i64 0}
!50 = !{!"jtbaa_data", !12, i64 0}
!51 = !{!52, !25}
!52 = distinct !{!52, !53, !"primal"}
!53 = distinct !{!53, !" diff: %i5"}
!54 = !{!55, !23, !24, !19, !26}
!55 = distinct !{!55, !53, !"shadow_0"}
!56 = !DILocation(line: 14, scope: !39, inlinedAt: !41)
!57 = !{!58, !25}
!58 = distinct !{!58, !59, !"primal"}
!59 = distinct !{!59, !" diff: %i8"}
!60 = !{!61, !23, !24, !19, !26}
!61 = distinct !{!61, !59, !"shadow_0"}
!62 = !DILocation(line: 408, scope: !63, inlinedAt: !41)
!63 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!64 = !DIFile(filename: "float.jl", directory: ".")
!65 = !DILocation(line: 969, scope: !66, inlinedAt: !41)
!66 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !67, file: !67, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!55, !17, !23, !24, !19, !26}
!69 = !DILocation(line: 75, scope: !44, inlinedAt: !70)
!70 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !47)
!71 = !DILocation(line: 13, scope: !39, inlinedAt: !72)
!72 = distinct !DILocation(line: 15, scope: !42, inlinedAt: !73)
!73 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !70)
!74 = !{!75, !25}
!75 = distinct !{!75, !76, !"primal"}
!76 = distinct !{!76, !" diff: %i87"}
!77 = !{!78, !23, !24, !19, !26}
!78 = distinct !{!78, !76, !"shadow_0"}
!79 = !DILocation(line: 14, scope: !39, inlinedAt: !72)
!80 = !DILocation(line: 408, scope: !63, inlinedAt: !72)
!81 = !DILocation(line: 969, scope: !66, inlinedAt: !72)
!82 = !{!78, !17, !23, !24, !19, !26}
!83 = !DILocation(line: 87, scope: !84, inlinedAt: !86)
!84 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !85, file: !85, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!85 = !DIFile(filename: "int.jl", directory: ".")
!86 = distinct !DILocation(line: 78, scope: !44, inlinedAt: !70)
!87 = !{!75}
!88 = !{!78}
!89 = distinct !{!89}
!90 = !{!55, !25}
!91 = !{!52, !17, !23, !24, !19, !26}
!92 = !{!61, !25}
!93 = !{!58, !23, !24, !19, !26}
!94 = !{!52, !23, !24, !19, !26}
!95 = !{!78, !25}
!96 = !{!75, !23, !24, !19, !26}
!97 = !{!75, !17, !23, !24, !19, !26}
""","dsquare"), Int64, Tuple{Any,Any,Int64},
dK, dacc,N)
end
K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
@show grad(K, dK, acc,dacc, N)
@show dK
I am intensely confused here @vchuravy
The incorrect runtime LLVM:
wmoses@beast:~/git/Enzyme.jl (inact_gf) $ cat out.txt
; ModuleID = 'grad'
source_filename = "grad"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; @ /home/wmoses/wat.jl:2 within `grad`
define i64 @julia_grad_42({}* noundef nonnull align 16 dereferenceable(40) %0, {}* noundef nonnull align 16 dereferenceable(40) %1, {}* noundef nonnull align 16 dereferenceable(40) %2, {}* noundef nonnull align 16 dereferenceable(40) %3, i64 signext %4) #0 {
top:
; @ /home/wmoses/wat.jl:3 within `grad`
%5 = call i64 @julia_grad_42u44({}* nonnull %1, {}* nonnull %3)
ret i64 %5
}
define nonnull {}* @jfptr_grad_43({}* %0, {}** noalias nocapture noundef readonly %1, i32 %2) #0 {
top:
%3 = load {}*, {}** %1, align 8
%4 = getelementptr inbounds {}*, {}** %1, i64 1
%5 = load {}*, {}** %4, align 8
%6 = getelementptr inbounds {}*, {}** %1, i64 2
%7 = load {}*, {}** %6, align 8
%8 = getelementptr inbounds {}*, {}** %1, i64 3
%9 = load {}*, {}** %8, align 8
%10 = getelementptr inbounds {}*, {}** %1, i64 4
%11 = bitcast {}** %10 to i64**
%12 = load i64*, i64** %11, align 8
%13 = load i64, i64* %12, align 8
%14 = call i64 @julia_grad_42({}* %3, {}* %5, {}* %7, {}* %9, i64 signext %13) #0
%15 = call nonnull {}* @ijl_box_int64(i64 signext %14)
ret {}* %15
}
declare {}* @ijl_box_int64(i64)
declare token @llvm.julia.gc_preserve_begin(...)
declare void @llvm.julia.gc_preserve_end(token)
define internal i64 @julia_grad_42u44({}* %da, {}* %db) {
bb:
%0 = bitcast {}* %da to double**
%"i8'ipl" = load double*, double** %0, align 16
%"i3'ipc" = bitcast {}* %db to double**
%"i5'ipl" = load double*, double** %"i3'ipc", align 16
%r = call i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 4)
ret i64 %r
}
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:8 within `local_sensitivity_non_mutating`
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 %loopsize) #1 {
entry:
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ essentials.jl:14 within `getindex`
%"i14'ipg" = getelementptr inbounds double, double* %"i8'ipl", i64 1
; ││ @ essentials.jl:13 within `getindex`
%"i12'ipg" = getelementptr inbounds double, double* %"i5'ipl", i64 1
; └└
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:75 within `macro expansion`
%0 = add i64 %loopsize, -1
%min.iters.check = icmp ult i64 %loopsize, 4
br i1 %min.iters.check, label %scalar.ph, label %vector.scevcheck
vector.scevcheck: ; preds = %entry
%1 = shl i64 %loopsize, 2
%2 = add i64 %1, 12
%scevgep = getelementptr double, double* %"i8'ipl", i64 %2
%mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 32)
%mul.result = extractvalue { i64, i1 } %mul, 0
%mul.overflow = extractvalue { i64, i1 } %mul, 1
%scevgep2 = bitcast double* %scevgep to i8*
%3 = sub i64 0, %mul.result
%4 = getelementptr i8, i8* %scevgep2, i64 %3
%5 = icmp ugt i8* %4, %scevgep2
%6 = add i64 %1, 13
%scevgep3 = getelementptr double, double* %"i8'ipl", i64 %6
%scevgep37 = bitcast double* %scevgep3 to i8*
%7 = getelementptr i8, i8* %scevgep37, i64 %3
%8 = icmp ugt i8* %7, %scevgep37
%9 = or i1 %8, %mul.overflow
%10 = add i64 %1, 14
%scevgep8 = getelementptr double, double* %"i8'ipl", i64 %10
%scevgep812 = bitcast double* %scevgep8 to i8*
%11 = getelementptr i8, i8* %scevgep812, i64 %3
%12 = icmp ugt i8* %11, %scevgep812
%13 = add i64 %1, 15
%scevgep13 = getelementptr double, double* %"i8'ipl", i64 %13
%scevgep1317 = bitcast double* %scevgep13 to i8*
%14 = getelementptr i8, i8* %scevgep1317, i64 %3
%15 = icmp ugt i8* %14, %scevgep1317
%16 = add i64 %1, -4
%scevgep18 = getelementptr double, double* %"i5'ipl", i64 %16
%scevgep1822 = bitcast double* %scevgep18 to i8*
%17 = getelementptr i8, i8* %scevgep1822, i64 %3
%18 = icmp ugt i8* %17, %scevgep1822
%19 = or i1 %18, %mul.overflow
%20 = add i64 %1, -3
%scevgep23 = getelementptr double, double* %"i5'ipl", i64 %20
%scevgep2327 = bitcast double* %scevgep23 to i8*
%21 = getelementptr i8, i8* %scevgep2327, i64 %3
%22 = icmp ugt i8* %21, %scevgep2327
%23 = add i64 %1, -2
%scevgep28 = getelementptr double, double* %"i5'ipl", i64 %23
%scevgep2832 = bitcast double* %scevgep28 to i8*
%24 = getelementptr i8, i8* %scevgep2832, i64 %3
%25 = icmp ugt i8* %24, %scevgep2832
%26 = add i64 %1, -1
%scevgep33 = getelementptr double, double* %"i5'ipl", i64 %26
%scevgep3337 = bitcast double* %scevgep33 to i8*
%27 = getelementptr i8, i8* %scevgep3337, i64 %3
%28 = icmp ugt i8* %27, %scevgep3337
%29 = or i1 %28, %mul.overflow
%30 = or i1 %5, %9
%31 = or i1 %12, %30
%32 = or i1 %15, %31
%33 = or i1 %32, %19
%34 = or i1 %22, %33
%35 = or i1 %25, %34
%36 = or i1 %35, %29
br i1 %36, label %scalar.ph, label %vector.memcheck
vector.memcheck: ; preds = %vector.scevcheck
%scevgep39 = getelementptr double, double* %"i5'ipl", i64 %1
%scevgep41 = getelementptr double, double* %"i8'ipl", i64 16
%37 = add i64 %1, 16
%scevgep43 = getelementptr double, double* %"i8'ipl", i64 %37
%bound0 = icmp ugt double* %scevgep43, %"i5'ipl"
%bound1 = icmp ult double* %scevgep41, %scevgep39
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %vector.memcheck
%n.vec = and i64 %loopsize, -4
%ind.end = sub i64 %0, %n.vec
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%offset.idx = sub i64 %0, %index
%38 = shl i64 %offset.idx, 2
%39 = add i64 %38, -4
%40 = add i64 %38, -8
%41 = add i64 %38, -12
%42 = or i64 %38, 3
%43 = or i64 %39, 3
%44 = or i64 %40, 3
%45 = or i64 %41, 3
%46 = getelementptr inbounds double, double* %"i5'ipl", i64 %42
%47 = getelementptr inbounds double, double* %"i5'ipl", i64 %43
%48 = getelementptr inbounds double, double* %"i5'ipl", i64 %44
%49 = getelementptr inbounds double, double* %"i5'ipl", i64 %45
%50 = load double, double* %46, align 8
%51 = load double, double* %47, align 8
%52 = load double, double* %48, align 8
%53 = load double, double* %49, align 8
; │ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:15
; │┌ @ array.jl:969 within `setindex!`
%54 = insertelement <4 x double> poison, double %50, i64 0
%55 = insertelement <4 x double> %54, double %51, i64 1
%56 = insertelement <4 x double> %55, double %52, i64 2
%57 = insertelement <4 x double> %56, double %53, i64 3
store double 0.000000e+00, double* %46, align 8
store double 0.000000e+00, double* %47, align 8
store double 0.000000e+00, double* %48, align 8
store double 0.000000e+00, double* %49, align 8
%58 = add i64 %38, 19
%59 = getelementptr inbounds double, double* %"i8'ipl", i64 -15
%60 = getelementptr inbounds double, double* %59, i64 %58
%61 = bitcast double* %60 to <16 x double>*
; │└
; │┌ @ essentials.jl:14 within `getindex`
%wide.vec = load <16 x double>, <16 x double>* %61, align 8
%strided.vec45 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
%reverse46 = shufflevector <4 x double> %strided.vec45, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%strided.vec47 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
%reverse48 = shufflevector <4 x double> %strided.vec47, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%strided.vec49 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
%reverse50 = shufflevector <4 x double> %strided.vec49, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%62 = fadd fast <4 x double> %reverse50, %57
%63 = getelementptr inbounds double, double* %46, i64 -15
%64 = bitcast double* %63 to <16 x double>*
; ││ @ essentials.jl:13 within `getindex`
%wide.vec51 = load <16 x double>, <16 x double>* %64, align 8
%strided.vec54 = shufflevector <16 x double> %wide.vec51, <16 x double> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
%reverse55 = shufflevector <4 x double> %strided.vec54, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%strided.vec56 = shufflevector <16 x double> %wide.vec51, <16 x double> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
%reverse57 = shufflevector <4 x double> %strided.vec56, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%strided.vec58 = shufflevector <16 x double> %wide.vec51, <16 x double> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
%reverse59 = shufflevector <4 x double> %strided.vec58, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%65 = fadd fast <4 x double> %reverse59, %57
; │└
; │┌ @ array.jl:969 within `setindex!`
%66 = shufflevector <16 x double> %wide.vec51, <16 x double> undef, <4 x i32> <i32 14, i32 10, i32 6, i32 2>
%67 = fadd fast <4 x double> %reverse48, %66
%68 = fadd fast <4 x double> %reverse57, %66
%69 = or i64 %38, 1
%70 = or i64 %39, 1
%71 = or i64 %40, 1
%72 = or i64 %41, 1
%73 = getelementptr inbounds double, double* %"i5'ipl", i64 %69
%74 = getelementptr inbounds double, double* %"i5'ipl", i64 %70
%75 = getelementptr inbounds double, double* %"i5'ipl", i64 %71
%76 = getelementptr inbounds double, double* %"i5'ipl", i64 %72
%77 = shufflevector <16 x double> %wide.vec51, <16 x double> undef, <4 x i32> <i32 13, i32 9, i32 5, i32 1>
%78 = bitcast double* %73 to <2 x double>*
store <2 x double> zeroinitializer, <2 x double>* %78, align 8
%79 = bitcast double* %74 to <2 x double>*
store <2 x double> zeroinitializer, <2 x double>* %79, align 8
%80 = bitcast double* %75 to <2 x double>*
store <2 x double> zeroinitializer, <2 x double>* %80, align 8
%81 = bitcast double* %76 to <2 x double>*
store <2 x double> zeroinitializer, <2 x double>* %81, align 8
%82 = fadd fast <4 x double> %reverse46, %77
%83 = fadd fast <4 x double> %reverse55, %77
%84 = getelementptr inbounds double, double* %"i5'ipl", i64 -12
%85 = getelementptr inbounds double, double* %84, i64 %38
%86 = bitcast double* %85 to <16 x double>*
%reverse60 = shufflevector <4 x double> %83, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%reverse61 = shufflevector <4 x double> %68, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%reverse62 = shufflevector <4 x double> %65, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%87 = shufflevector <4 x double> zeroinitializer, <4 x double> %reverse60, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%88 = shufflevector <4 x double> %reverse61, <4 x double> %reverse62, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%interleaved.vec = shufflevector <8 x double> %87, <8 x double> %88, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
store <16 x double> %interleaved.vec, <16 x double>* %86, align 8
%89 = add i64 %38, 16
%90 = getelementptr inbounds double, double* %"i8'ipl", i64 -12
%91 = fadd fast <16 x double> %wide.vec, %wide.vec51
%92 = shufflevector <16 x double> %91, <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
%93 = shufflevector <4 x double> %92, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%94 = getelementptr inbounds double, double* %90, i64 %89
%95 = bitcast double* %94 to <16 x double>*
; │└
; │┌ @ essentials.jl:14 within `getindex`
%reverse63 = shufflevector <4 x double> %93, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%reverse64 = shufflevector <4 x double> %82, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%reverse65 = shufflevector <4 x double> %67, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%reverse66 = shufflevector <4 x double> %62, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%96 = shufflevector <4 x double> %reverse63, <4 x double> %reverse64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%97 = shufflevector <4 x double> %reverse65, <4 x double> %reverse66, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%interleaved.vec67 = shufflevector <8 x double> %96, <8 x double> %97, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
store <16 x double> %interleaved.vec67, <16 x double>* %95, align 8
%index.next = add nuw i64 %index, 4
%98 = icmp eq i64 %index.next, %n.vec
br i1 %98, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %loopsize
br i1 %cmp.n, label %invertentry, label %scalar.ph
scalar.ph: ; preds = %middle.block, %vector.memcheck, %vector.scevcheck, %entry
%bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ %0, %entry ], [ %0, %vector.scevcheck ], [ %0, %vector.memcheck ]
br label %invertL51.i
invertentry: ; preds = %invertL51.i, %middle.block
%i2 = load double, double* %"i12'ipg", align 8
; └└
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i12'ipg", align 8
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i9 = load double, double* %"i14'ipg", align 8
%i10 = fadd fast double %i9, %i2
store double %i10, double* %"i14'ipg", align 8
; ││ @ essentials.jl:13 within `getindex`
%i11 = load double, double* %"i12'ipg", align 8
%i17 = fadd fast double %i11, %i2
store double %i17, double* %"i12'ipg", align 8
ret i64 %0
invertL51.i: ; preds = %invertL51.i, %scalar.ph
%"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %bc.resume.val, %scalar.ph ]
%_unwrap = shl i64 %"iv'ac.0", 2
%i115_unwrap = or i64 %_unwrap, 3
%"i116'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i115_unwrap
%i18 = load double, double* %"i116'ipg_unwrap", align 8
; └└
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:15
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i116'ipg_unwrap", align 8
%i118_unwrap = add i64 %_unwrap, 19
%"i119'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i118_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i19 = load double, double* %"i119'ipg_unwrap", align 8
%i20 = fadd fast double %i19, %i18
store double %i20, double* %"i119'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
%i21 = load double, double* %"i116'ipg_unwrap", align 8
%i22 = fadd fast double %i21, %i18
store double %i22, double* %"i116'ipg_unwrap", align 8
%i108_unwrap = or i64 %_unwrap, 2
%"i109'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i108_unwrap
%i23 = load double, double* %"i109'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i109'ipg_unwrap", align 8
%i111_unwrap = add i64 %_unwrap, 18
%"i112'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i111_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i24 = load double, double* %"i112'ipg_unwrap", align 8
%i25 = fadd fast double %i24, %i23
store double %i25, double* %"i112'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
%i26 = load double, double* %"i109'ipg_unwrap", align 8
%i27 = fadd fast double %i26, %i23
store double %i27, double* %"i109'ipg_unwrap", align 8
%i101_unwrap = or i64 %_unwrap, 1
%"i102'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i101_unwrap
%i28 = load double, double* %"i102'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i102'ipg_unwrap", align 8
%i104_unwrap = add i64 %_unwrap, 17
%"i105'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i104_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i29 = load double, double* %"i105'ipg_unwrap", align 8
%i30 = fadd fast double %i29, %i28
store double %i30, double* %"i105'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
%a19 = load double, double* %"i102'ipg_unwrap", align 8
%a20 = fadd fast double %a19, %i28
store double %a20, double* %"i102'ipg_unwrap", align 8
%"i95'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %_unwrap
%a21 = load double, double* %"i95'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i95'ipg_unwrap", align 8
%i97_unwrap = add i64 %_unwrap, 16
%"i98'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i97_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%a22 = load double, double* %"i98'ipg_unwrap", align 8
%a23 = fadd fast double %a22, %a21
store double %a23, double* %"i98'ipg_unwrap", align 8
%a26 = icmp eq i64 %"iv'ac.0", 0
%a27 = add i64 %"iv'ac.0", -1
br i1 %a26, label %invertentry, label %invertL51.i
; └└
}
declare void @ijl_gc_queue_root({}*)
declare void @jl_gc_queue_binding({}*)
declare {}* @ijl_gc_pool_alloc(i8*, i32, i32)
declare {}* @ijl_gc_big_alloc(i8*, i64)
declare {}* @ijl_gc_alloc_typed(i8*, i64, i8*)
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #2
attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
!llvm.module.flags = !{!0, !1, !2, !3}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"stack-protector-guard", !""}
!3 = !{i32 1, !"override-stack-alignment", i32 0}
#= /home/wmoses/wat.jl:221 =# @code_llvm(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
.text
.file "grad"
.globl julia_grad_57 # -- Begin function julia_grad_57
.p2align 4, 0x90
.type julia_grad_57,@function
julia_grad_57: # @julia_grad_57
.Lfunc_begin0:
; ┌ @ /home/wmoses/wat.jl:2 within `grad`
.cfi_startproc
# %bb.0: # %top
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movq %rsi, %rdi
; │ @ /home/wmoses/wat.jl:3 within `grad`
movabsq $julia_grad_57u59, %rax
movq %rcx, %rsi
callq *%rax
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size julia_grad_57, .Lfunc_end0-julia_grad_57
.cfi_endproc
; └
# -- End function
.section ".note.GNU-stack","",@progbits
#= /home/wmoses/wat.jl:222 =# @code_native(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
grad(K, dK, acc, dacc, N) = 3
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
The correct runtime llvm:
; ModuleID = 'grad'
source_filename = "grad"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; @ /home/wmoses/wat2.jl:2 within `grad`
define i64 @julia_grad_42({}* noundef nonnull align 16 dereferenceable(40) %0, {}* noundef nonnull align 16 dereferenceable(40) %1, {}* noundef nonnull align 16 dereferenceable(40) %2, {}* noundef nonnull align 16 dereferenceable(40) %3, i64 signext %4) #0 {
top:
; @ /home/wmoses/wat2.jl:3 within `grad`
%5 = call i64 @julia_grad_42u44({}* nonnull %1, {}* nonnull %3)
ret i64 %5
}
define nonnull {}* @jfptr_grad_43({}* %0, {}** noalias nocapture noundef readonly %1, i32 %2) #0 {
top:
%3 = load {}*, {}** %1, align 8
%4 = getelementptr inbounds {}*, {}** %1, i64 1
%5 = load {}*, {}** %4, align 8
%6 = getelementptr inbounds {}*, {}** %1, i64 2
%7 = load {}*, {}** %6, align 8
%8 = getelementptr inbounds {}*, {}** %1, i64 3
%9 = load {}*, {}** %8, align 8
%10 = getelementptr inbounds {}*, {}** %1, i64 4
%11 = bitcast {}** %10 to i64**
%12 = load i64*, i64** %11, align 8
%13 = load i64, i64* %12, align 8
%14 = call i64 @julia_grad_42({}* %3, {}* %5, {}* %7, {}* %9, i64 signext %13) #0
%15 = call nonnull {}* @ijl_box_int64(i64 signext %14)
ret {}* %15
}
declare {}* @ijl_box_int64(i64)
declare token @llvm.julia.gc_preserve_begin(...)
declare void @llvm.julia.gc_preserve_end(token)
define internal i64 @julia_grad_42u44({}* %da, {}* %db) {
bb:
%"i6'ipc" = bitcast {}* %da to double**
%"i8'ipl" = load double*, double** %"i6'ipc", align 16
%"i3'ipc" = bitcast {}* %db to double**
%"i5'ipl" = load double*, double** %"i3'ipc", align 16
%r = call i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 4)
ret i64 %r
}
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:8 within `local_sensitivity_non_mutating`
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 %loopsize) #1 {
entry:
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ essentials.jl:14 within `getindex`
%"i14'ipg" = getelementptr inbounds double, double* %"i8'ipl", i64 1
; ││ @ essentials.jl:13 within `getindex`
%"i12'ipg" = getelementptr inbounds double, double* %"i5'ipl", i64 1
; └└
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:75 within `macro expansion`
%0 = add i64 %loopsize, -1
br label %invertL51.i
invertentry: ; preds = %invertL51.i
%i2 = load double, double* %"i12'ipg", align 8
; └
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i12'ipg", align 8
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i9 = load double, double* %"i14'ipg", align 8
%i10 = fadd fast double %i9, %i2
store double %i10, double* %"i14'ipg", align 8
; ││ @ essentials.jl:13 within `getindex`
%i11 = load double, double* %"i12'ipg", align 8
%i17 = fadd fast double %i11, %i2
store double %i17, double* %"i12'ipg", align 8
ret i64 %0
invertL51.i: ; preds = %invertL51.i, %entry
%"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %0, %entry ]
%_unwrap = shl i64 %"iv'ac.0", 2
%i115_unwrap = or i64 %_unwrap, 3
%"i116'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i115_unwrap
%i18 = load double, double* %"i116'ipg_unwrap", align 8
; └└
; @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:15
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i116'ipg_unwrap", align 8
%i118_unwrap = add i64 %_unwrap, 19
%"i119'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i118_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i19 = load double, double* %"i119'ipg_unwrap", align 8
%i20 = fadd fast double %i19, %i18
store double %i20, double* %"i119'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
%i21 = load double, double* %"i116'ipg_unwrap", align 8
%i22 = fadd fast double %i21, %i18
store double %i22, double* %"i116'ipg_unwrap", align 8
%i108_unwrap = or i64 %_unwrap, 2
%"i109'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i108_unwrap
%i23 = load double, double* %"i109'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i109'ipg_unwrap", align 8
%i111_unwrap = add i64 %_unwrap, 18
%"i112'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i111_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i24 = load double, double* %"i112'ipg_unwrap", align 8
%i25 = fadd fast double %i24, %i23
store double %i25, double* %"i112'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
%i26 = load double, double* %"i109'ipg_unwrap", align 8
%i27 = fadd fast double %i26, %i23
store double %i27, double* %"i109'ipg_unwrap", align 8
%i101_unwrap = or i64 %_unwrap, 1
%"i102'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i101_unwrap
%i28 = load double, double* %"i102'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i102'ipg_unwrap", align 8
%i104_unwrap = add i64 %_unwrap, 17
%"i105'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i104_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%i29 = load double, double* %"i105'ipg_unwrap", align 8
%i30 = fadd fast double %i29, %i28
store double %i30, double* %"i105'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
%a19 = load double, double* %"i102'ipg_unwrap", align 8
%a20 = fadd fast double %a19, %i28
store double %a20, double* %"i102'ipg_unwrap", align 8
%"i95'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %_unwrap
%a21 = load double, double* %"i95'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
store double 0.000000e+00, double* %"i95'ipg_unwrap", align 8
%i97_unwrap = add i64 %_unwrap, 16
%"i98'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i97_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
%a22 = load double, double* %"i98'ipg_unwrap", align 8
%a23 = fadd fast double %a22, %a21
store double %a23, double* %"i98'ipg_unwrap", align 8
%a26 = icmp eq i64 %"iv'ac.0", 0
%a27 = add i64 %"iv'ac.0", -1
br i1 %a26, label %invertentry, label %invertL51.i
; └└
}
declare void @ijl_gc_queue_root({}*)
declare void @jl_gc_queue_binding({}*)
declare {}* @ijl_gc_pool_alloc(i8*, i32, i32)
declare {}* @ijl_gc_big_alloc(i8*, i64)
declare {}* @ijl_gc_alloc_typed(i8*, i64, i8*)
attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #1 = { mustprogress noinline willreturn }
!llvm.module.flags = !{!0, !1, !2, !3}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"stack-protector-guard", !""}
!3 = !{i32 1, !"override-stack-alignment", i32 0}
#= /home/wmoses/wat2.jl:221 =# @code_llvm(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
.text
.file "grad"
.globl julia_grad_57 # -- Begin function julia_grad_57
.p2align 4, 0x90
.type julia_grad_57,@function
julia_grad_57: # @julia_grad_57
.Lfunc_begin0:
; ┌ @ /home/wmoses/wat2.jl:2 within `grad`
.cfi_startproc
# %bb.0: # %top
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movq %rsi, %rdi
; │ @ /home/wmoses/wat2.jl:3 within `grad`
movabsq $julia_grad_57u59, %rax
movq %rcx, %rsi
callq *%rax
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size julia_grad_57, .Lfunc_end0-julia_grad_57
.cfi_endproc
; └
# -- End function
.section ".note.GNU-stack","",@progbits
#= /home/wmoses/wat2.jl:222 =# @code_native(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
grad(K, dK, acc, dacc, N) = 3
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]