Enzyme icon indicating copy to clipboard operation
Enzyme copied to clipboard

Incorrect value error

Open wsmoses opened this issue 1 year ago • 11 comments

https://fwd.gymni.ch/SH5Lj4

wsmoses avatar Jun 08 '23 01:06 wsmoses

Adding the optnone prevents the issue, not having it has the issue


function grad(K, dK, acc, dacc, N)
Base.llvmcall(("""
; ModuleID = '/tmp/compiler-explorer-compiler202358-73-l9tv0f.2f1mg/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

@.str = private unnamed_addr constant [5 x i8] c"f%d\\0A\\00", align 1

; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0

declare void @__enzyme_autodiff(...) local_unnamed_addr

define void @dsquare(double %arg, {} addrspace(10)* %a, {} addrspace(10)* %da, {} addrspace(10)* %b, {} addrspace(10)* %db) local_unnamed_addr {
bb:
  call void @diffesq({} addrspace(10)* %a, {} addrspace(10)* %da, {} addrspace(10)* %b, {} addrspace(10)* %db, i64 16)
  ret void
}

; Function Attrs: noinline
define void @sq({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg, {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg1, i64 %loopsize) #1 !dbg !6 {
entry:
  %i = call {}*** @julia.get_pgcstack()
  %i3 = bitcast {} addrspace(10)* %arg1 to double addrspace(13)* addrspace(10)*
  %i4 = addrspacecast double addrspace(13)* addrspace(10)* %i3 to double addrspace(13)* addrspace(11)*
  %i5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !14, !noalias !19, !nonnull !8
  %i6 = bitcast {} addrspace(10)* %arg to double addrspace(13)* addrspace(10)*
  %i7 = addrspacecast double addrspace(13)* addrspace(10)* %i6 to double addrspace(13)* addrspace(11)*
  %i8 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i7, align 16, !tbaa !24, !invariant.load !8, !alias.scope !26, !noalias !27, !nonnull !8
  %i12 = getelementptr inbounds double, double addrspace(13)* %i5, i64 1, !dbg !28
  %i13 = load double, double addrspace(13)* %i12, align 8, !dbg !28, !tbaa !38, !alias.scope !41, !noalias !42
  %i14 = getelementptr inbounds double, double addrspace(13)* %i8, i64 1, !dbg !43
  %i15 = load double, double addrspace(13)* %i14, align 8, !dbg !43, !tbaa !38, !alias.scope !41, !noalias !42
  %i16 = fadd double %i13, %i15, !dbg !44
  store double %i16, double addrspace(13)* %i12, align 8, !dbg !47, !tbaa !38, !alias.scope !41, !noalias !50
  %i87 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !14, !noalias !19, !nonnull !8
  br label %L51.i, !dbg !51

L51.i:                                            ; preds = %L51.i, %entry
  %value_phi3.i9 = phi i64 [ 0, %entry ], [ %i122, %L51.i ]
  %a2 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %value_phi3.i9)
  %i95 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %value_phi3.i9, !dbg !53
  %i96 = load double, double addrspace(13)* %i95, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
  %i97 = add nuw i64 %value_phi3.i9, 16, !dbg !56
  %i98 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i97, !dbg !56
  %i99 = load double, double addrspace(13)* %i98, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
  %i100 = fadd double %i96, %i99, !dbg !57
  store double %i100, double addrspace(13)* %i95, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
  %i101 = or i64 %value_phi3.i9, 1, !dbg !59
  %i102 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i101, !dbg !53
  %i103 = load double, double addrspace(13)* %i102, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
  %i104 = add nuw i64 %i101, 16, !dbg !56
  %i105 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i104, !dbg !56
  %i106 = load double, double addrspace(13)* %i105, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
  %i107 = fadd double %i103, %i106, !dbg !57
  store double %i107, double addrspace(13)* %i102, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
  %i108 = or i64 %value_phi3.i9, 2, !dbg !59
  %i109 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i108, !dbg !53
  %i110 = load double, double addrspace(13)* %i109, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
  %i111 = add nuw i64 %i108, 16, !dbg !56
  %i112 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i111, !dbg !56
  %i113 = load double, double addrspace(13)* %i112, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
  %i114 = fadd double %i110, %i113, !dbg !57
  store double %i114, double addrspace(13)* %i109, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
  %i115 = or i64 %value_phi3.i9, 3, !dbg !59
  %i116 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i115, !dbg !53
  %i117 = load double, double addrspace(13)* %i116, align 8, !dbg !53, !tbaa !38, !alias.scope !41, !noalias !42
  %i118 = add nuw i64 %i115, 16, !dbg !56
  %i119 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i118, !dbg !56
  %i120 = load double, double addrspace(13)* %i119, align 8, !dbg !56, !tbaa !38, !alias.scope !41, !noalias !42
  %i121 = fadd double %i117, %i120, !dbg !57
  store double %i121, double addrspace(13)* %i116, align 8, !dbg !58, !tbaa !38, !alias.scope !41, !noalias !50
  %i122 = add nuw nsw i64 %value_phi3.i9, 4, !dbg !59
  %niter.ncmp.3.not = icmp eq i64 %i122, %loopsize, !dbg !51
  br i1 %niter.ncmp.3.not, label %L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, label %L51.i, !dbg !51, !llvm.loop !63

L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %L51.i
  ret void, !dbg !64
}

declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr

; Function Attrs: mustprogress noinline willreturn
define internal void @diffesq({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg, {} addrspace(10)* nocapture nofree align 16 %"arg'", {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %arg1, {} addrspace(10)* nocapture nofree align 16 %"arg1'", i64 %loopsize) #20 !dbg !65 {
entry:
  %"iv'ac" = alloca i64, align 8
  %loopLimit_cache = alloca i64, align 8
  %"i16'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i16'de", align 8
  %"i13'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i13'de", align 8
  %"i15'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i15'de", align 8
  %"i121'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i121'de", align 8
  %"i117'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i117'de", align 8
  %"i120'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i120'de", align 8
  %"i114'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i114'de", align 8
  %"i110'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i110'de", align 8
  %"i113'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i113'de", align 8
  %"i107'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i107'de", align 8
  %"i103'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i103'de", align 8
  %"i106'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i106'de", align 8
  %"i100'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i100'de", align 8
  %"i96'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i96'de", align 8
  %"i99'de" = alloca double, align 8
  store double 0.000000e+00, double* %"i99'de", align 8
  %i = call {}*** @julia.get_pgcstack() #3
  %"i3'ipc" = bitcast {} addrspace(10)* %"arg1'" to double addrspace(13)* addrspace(10)*
  %i3 = bitcast {} addrspace(10)* %arg1 to double addrspace(13)* addrspace(10)*
  %"i4'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i3'ipc" to double addrspace(13)* addrspace(11)*
  %i4 = addrspacecast double addrspace(13)* addrspace(10)* %i3 to double addrspace(13)* addrspace(11)*
  %"i5'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16, !tbaa !9, !alias.scope !66, !noalias !71, !nonnull !8
  %i5 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !73, !noalias !74, !nonnull !8
  %"i6'ipc" = bitcast {} addrspace(10)* %"arg'" to double addrspace(13)* addrspace(10)*
  %i6 = bitcast {} addrspace(10)* %arg to double addrspace(13)* addrspace(10)*
  %"i7'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i6'ipc" to double addrspace(13)* addrspace(11)*
  %i7 = addrspacecast double addrspace(13)* addrspace(10)* %i6 to double addrspace(13)* addrspace(11)*
  %"i8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i7'ipc", align 16, !tbaa !24, !alias.scope !75, !noalias !78, !nonnull !8
  %i8 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i7, align 16, !tbaa !24, !invariant.load !8, !alias.scope !80, !noalias !81, !nonnull !8
  %"i12'ipg" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 1, !dbg !82
  %i12 = getelementptr inbounds double, double addrspace(13)* %i5, i64 1, !dbg !82
  %i13 = load double, double addrspace(13)* %i12, align 8, !dbg !82, !tbaa !38, !alias.scope !87, !noalias !90
  %"i14'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 1, !dbg !92
  %i14 = getelementptr inbounds double, double addrspace(13)* %i8, i64 1, !dbg !92
  %i15 = load double, double addrspace(13)* %i14, align 8, !dbg !92, !tbaa !38, !alias.scope !93, !noalias !96
  %i16 = fadd double %i13, %i15, !dbg !98
  store double %i16, double addrspace(13)* %i12, align 8, !dbg !99, !tbaa !38, !alias.scope !87, !noalias !100
  %"i87'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16, !tbaa !9, !alias.scope !66, !noalias !71, !nonnull !8
  %i87 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %i4, align 16, !tbaa !9, !alias.scope !73, !noalias !74, !nonnull !8
  br label %L51.i, !dbg !101

L51.i:                                            ; preds = %L51.i, %entry
  %iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
  %iv.next = add nuw nsw i64 %iv, 1
  %0 = shl i64 %iv, 2
  %a2 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %0) #3
  %"i95'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %0, !dbg !103
  %i95 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %0, !dbg !103
  %i96 = load double, double addrspace(13)* %i95, align 8, !dbg !103, !tbaa !38
  %i97 = add nuw i64 %0, 16, !dbg !111
  %"i98'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97, !dbg !111
  %i98 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i97, !dbg !111
  %i99 = load double, double addrspace(13)* %i98, align 8, !dbg !111, !tbaa !38
  %i100 = fadd double %i96, %i99, !dbg !112
  store double %i100, double addrspace(13)* %i95, align 8, !dbg !113, !tbaa !38
  %i101 = or i64 %0, 1, !dbg !115
  %"i102'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i101, !dbg !103
  %i102 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i101, !dbg !103
  %i103 = load double, double addrspace(13)* %i102, align 8, !dbg !103, !tbaa !38
  %i104 = add nuw i64 %i101, 16, !dbg !111
  %"i105'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104, !dbg !111
  %i105 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i104, !dbg !111
  %i106 = load double, double addrspace(13)* %i105, align 8, !dbg !111, !tbaa !38
  %i107 = fadd double %i103, %i106, !dbg !112
  store double %i107, double addrspace(13)* %i102, align 8, !dbg !113, !tbaa !38
  %i108 = or i64 %0, 2, !dbg !115
  %"i109'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i108, !dbg !103
  %i109 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i108, !dbg !103
  %i110 = load double, double addrspace(13)* %i109, align 8, !dbg !103, !tbaa !38
  %i111 = add nuw i64 %i108, 16, !dbg !111
  %"i112'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111, !dbg !111
  %i112 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i111, !dbg !111
  %i113 = load double, double addrspace(13)* %i112, align 8, !dbg !111, !tbaa !38
  %i114 = fadd double %i110, %i113, !dbg !112
  store double %i114, double addrspace(13)* %i109, align 8, !dbg !113, !tbaa !38
  %i115 = or i64 %0, 3, !dbg !115
  %"i116'ipg" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i115, !dbg !103
  %i116 = getelementptr inbounds double, double addrspace(13)* %i87, i64 %i115, !dbg !103
  %i117 = load double, double addrspace(13)* %i116, align 8, !dbg !103, !tbaa !38
  %i118 = add nuw i64 %i115, 16, !dbg !111
  %"i119'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118, !dbg !111
  %i119 = getelementptr inbounds double, double addrspace(13)* %i8, i64 %i118, !dbg !111
  %i120 = load double, double addrspace(13)* %i119, align 8, !dbg !111, !tbaa !38
  %i121 = fadd double %i117, %i120, !dbg !112
  store double %i121, double addrspace(13)* %i116, align 8, !dbg !113, !tbaa !38
  %i122 = add nuw nsw i64 %0, 4, !dbg !115
  %niter.ncmp.3.not = icmp eq i64 %i122, %loopsize, !dbg !101
  br i1 %niter.ncmp.3.not, label %L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, label %L51.i, !dbg !101, !llvm.loop !117

L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %L51.i
  %1 = phi i64 [ %iv, %L51.i ], !dbg !118
  store i64 %1, i64* %loopLimit_cache, align 8, !dbg !118, !invariant.group !119
  br label %invertL48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, !dbg !118

invertentry:                                      ; preds = %invertL51.i
  %2 = load double, double addrspace(13)* %"i12'ipg", align 8, !tbaa !38, !alias.scope !120, !noalias !121
  store double 0.000000e+00, double addrspace(13)* %"i12'ipg", align 8, !dbg !99, !tbaa !38, !alias.scope !120, !noalias !121
  %3 = load double, double* %"i16'de", align 8
  %4 = fadd fast double %3, %2
  store double %4, double* %"i16'de", align 8
  %5 = load double, double* %"i16'de", align 8
  store double 0.000000e+00, double* %"i16'de", align 8
  %6 = load double, double* %"i13'de", align 8
  %7 = fadd fast double %6, %5
  store double %7, double* %"i13'de", align 8
  %8 = load double, double* %"i15'de", align 8
  %9 = fadd fast double %8, %5
  store double %9, double* %"i15'de", align 8
  %10 = load double, double* %"i15'de", align 8
  store double 0.000000e+00, double* %"i15'de", align 8
  %11 = load double, double addrspace(13)* %"i14'ipg", align 8, !dbg !92, !tbaa !38
  %12 = fadd fast double %11, %10
  store double %12, double addrspace(13)* %"i14'ipg", align 8, !dbg !92, !tbaa !38
  %13 = load double, double* %"i13'de", align 8
  store double 0.000000e+00, double* %"i13'de", align 8
  %14 = load double, double addrspace(13)* %"i12'ipg", align 8, !dbg !82, !tbaa !38
  %15 = fadd fast double %14, %13
  store double %15, double addrspace(13)* %"i12'ipg", align 8, !dbg !82, !tbaa !38
  ret void

invertL51.i:                                      ; preds = %mergeinvertL51.i_L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa, %incinvertL51.i
  %16 = load i64, i64* %"iv'ac", align 8
  %_unwrap = shl i64 %16, 2
  %i115_unwrap = or i64 %_unwrap, 3
  %"i116'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i115_unwrap
  %17 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !tbaa !38
  store double 0.000000e+00, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !113, !tbaa !38
  %18 = load double, double* %"i121'de", align 8
  %19 = fadd fast double %18, %17
  store double %19, double* %"i121'de", align 8
  %20 = load double, double* %"i121'de", align 8
  store double 0.000000e+00, double* %"i121'de", align 8
  %21 = load double, double* %"i117'de", align 8
  %22 = fadd fast double %21, %20
  store double %22, double* %"i117'de", align 8
  %23 = load double, double* %"i120'de", align 8
  %24 = fadd fast double %23, %20
  store double %24, double* %"i120'de", align 8
  %25 = load double, double* %"i120'de", align 8
  store double 0.000000e+00, double* %"i120'de", align 8
  %26 = load i64, i64* %"iv'ac", align 8
  %i118_unwrap = add nuw i64 %i115_unwrap, 16
  %"i119'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118_unwrap
  %27 = load double, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %28 = fadd fast double %27, %25
  store double %28, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %29 = load double, double* %"i117'de", align 8
  store double 0.000000e+00, double* %"i117'de", align 8
  %30 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %31 = fadd fast double %30, %29
  store double %31, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %32 = load i64, i64* %"iv'ac", align 8
  %i108_unwrap = or i64 %_unwrap, 2
  %"i109'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i108_unwrap
  %33 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !tbaa !38
  store double 0.000000e+00, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !113, !tbaa !38
  %34 = load double, double* %"i114'de", align 8
  %35 = fadd fast double %34, %33
  store double %35, double* %"i114'de", align 8
  %36 = load double, double* %"i114'de", align 8
  store double 0.000000e+00, double* %"i114'de", align 8
  %37 = load double, double* %"i110'de", align 8
  %38 = fadd fast double %37, %36
  store double %38, double* %"i110'de", align 8
  %39 = load double, double* %"i113'de", align 8
  %40 = fadd fast double %39, %36
  store double %40, double* %"i113'de", align 8
  %41 = load double, double* %"i113'de", align 8
  store double 0.000000e+00, double* %"i113'de", align 8
  %42 = load i64, i64* %"iv'ac", align 8
  %i111_unwrap = add nuw i64 %i108_unwrap, 16
  %"i112'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111_unwrap
  %43 = load double, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %44 = fadd fast double %43, %41
  store double %44, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %45 = load double, double* %"i110'de", align 8
  store double 0.000000e+00, double* %"i110'de", align 8
  %46 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %47 = fadd fast double %46, %45
  store double %47, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %48 = load i64, i64* %"iv'ac", align 8
  %i101_unwrap = or i64 %_unwrap, 1
  %"i102'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %i101_unwrap
  %49 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !tbaa !38
  store double 0.000000e+00, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !113, !tbaa !38
  %50 = load double, double* %"i107'de", align 8
  %51 = fadd fast double %50, %49
  store double %51, double* %"i107'de", align 8
  %52 = load double, double* %"i107'de", align 8
  store double 0.000000e+00, double* %"i107'de", align 8
  %53 = load double, double* %"i103'de", align 8
  %54 = fadd fast double %53, %52
  store double %54, double* %"i103'de", align 8
  %55 = load double, double* %"i106'de", align 8
  %56 = fadd fast double %55, %52
  store double %56, double* %"i106'de", align 8
  %57 = load double, double* %"i106'de", align 8
  store double 0.000000e+00, double* %"i106'de", align 8
  %58 = load i64, i64* %"iv'ac", align 8
  %i104_unwrap = add nuw i64 %i101_unwrap, 16
  %"i105'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104_unwrap
  %59 = load double, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %60 = fadd fast double %59, %57
  store double %60, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %61 = load double, double* %"i103'de", align 8
  store double 0.000000e+00, double* %"i103'de", align 8
  %62 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %63 = fadd fast double %62, %61
  store double %63, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %64 = load i64, i64* %"iv'ac", align 8
  %"i95'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i87'ipl", i64 %_unwrap
  %65 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8, !tbaa !38
  store double 0.000000e+00, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !113, !tbaa !38
  %66 = load double, double* %"i100'de", align 8
  %67 = fadd fast double %66, %65
  store double %67, double* %"i100'de", align 8
  %68 = load double, double* %"i100'de", align 8
  store double 0.000000e+00, double* %"i100'de", align 8
  %69 = load double, double* %"i96'de", align 8
  %70 = fadd fast double %69, %68
  store double %70, double* %"i96'de", align 8
  %71 = load double, double* %"i99'de", align 8
  %72 = fadd fast double %71, %68
  store double %72, double* %"i99'de", align 8
  %73 = load double, double* %"i99'de", align 8
  store double 0.000000e+00, double* %"i99'de", align 8
  %74 = load i64, i64* %"iv'ac", align 8
  %i97_unwrap = add nuw i64 %_unwrap, 16
  %"i98'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97_unwrap
  %75 = load double, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %76 = fadd fast double %75, %73
  store double %76, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !111, !tbaa !38
  %77 = load double, double* %"i96'de", align 8
  store double 0.000000e+00, double* %"i96'de", align 8
  %78 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %79 = fadd fast double %78, %77
  store double %79, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !103, !tbaa !38
  %80 = load i64, i64* %"iv'ac", align 8
  %81 = icmp eq i64 %80, 0
  %82 = xor i1 %81, true
  br i1 %81, label %invertentry, label %incinvertL51.i

incinvertL51.i:                                   ; preds = %invertL51.i
  %83 = load i64, i64* %"iv'ac", align 8
  %84 = add nsw i64 %83, -1
  store i64 %84, i64* %"iv'ac", align 8
  br label %invertL51.i

invertL48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa
  %85 = load i64, i64* %loopLimit_cache, align 8, !invariant.group !119
  br label %mergeinvertL51.i_L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa

mergeinvertL51.i_L48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa: ; preds = %invertL48.i.julia_local_sensitivity_non_mutating_747_inner.exit.loopexit_crit_edge.unr-lcssa
  store i64 %85, i64* %"iv'ac", align 8
  br label %invertL51.i
}

attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { noinline }
attributes #2 = { mustprogress noinline willreturn }
attributes #20 = { mustprogress noinline optnone willreturn }
attributes #3 = { mustprogress willreturn }

!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17}
!15 = distinct !{!15, !16, !"na_addr13"}
!16 = distinct !{!16, !"addr13"}
!17 = !{!"jnoalias_typemd", !18}
!18 = !{!"jnoalias"}
!19 = !{!20, !21, !22, !23}
!20 = !{!"jnoalias_gcframe", !18}
!21 = !{!"jnoalias_stack", !18}
!22 = !{!"jnoalias_data", !18}
!23 = !{!"jnoalias_const", !18}
!24 = !{!25, !25, i64 0, i64 1}
!25 = !{!"jtbaa_const", !12, i64 0}
!26 = !{!15, !23}
!27 = !{!20, !21, !22, !17}
!28 = !DILocation(line: 13, scope: !29, inlinedAt: !31)
!29 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !30, file: !30, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!30 = !DIFile(filename: "essentials.jl", directory: ".")
!31 = distinct !DILocation(line: 12, scope: !32, inlinedAt: !33)
!32 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!33 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !36)
!34 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !35, file: !35, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!35 = !DIFile(filename: "simdloop.jl", directory: ".")
!36 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !37)
!37 = distinct !DILocation(line: 0, scope: !6)
!38 = !{!39, !39, i64 0}
!39 = !{!"jtbaa_arraybuf", !40, i64 0}
!40 = !{!"jtbaa_data", !12, i64 0}
!41 = !{!22}
!42 = !{!20, !21, !17, !23}
!43 = !DILocation(line: 14, scope: !29, inlinedAt: !31)
!44 = !DILocation(line: 408, scope: !45, inlinedAt: !31)
!45 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !46, file: !46, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!46 = !DIFile(filename: "float.jl", directory: ".")
!47 = !DILocation(line: 969, scope: !48, inlinedAt: !31)
!48 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !49, file: !49, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!49 = !DIFile(filename: "array.jl", directory: ".")
!50 = !{!15, !20, !21, !17, !23}
!51 = !DILocation(line: 75, scope: !34, inlinedAt: !52)
!52 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !37)
!53 = !DILocation(line: 13, scope: !29, inlinedAt: !54)
!54 = distinct !DILocation(line: 15, scope: !32, inlinedAt: !55)
!55 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !52)
!56 = !DILocation(line: 14, scope: !29, inlinedAt: !54)
!57 = !DILocation(line: 408, scope: !45, inlinedAt: !54)
!58 = !DILocation(line: 969, scope: !48, inlinedAt: !54)
!59 = !DILocation(line: 87, scope: !60, inlinedAt: !62)
!60 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !61, file: !61, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!61 = !DIFile(filename: "int.jl", directory: ".")
!62 = distinct !DILocation(line: 78, scope: !34, inlinedAt: !52)
!63 = distinct !{!63}
!64 = !DILocation(line: 0, scope: !6)
!65 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!66 = !{!67, !69, !17}
!67 = distinct !{!67, !68, !"shadow_0"}
!68 = distinct !{!68, !" diff: %arg1"}
!69 = distinct !{!69, !70, !"na_addr13"}
!70 = distinct !{!70, !"addr13"}
!71 = !{!72, !20, !21, !22, !23}
!72 = distinct !{!72, !68, !"primal"}
!73 = !{!72, !69, !17}
!74 = !{!67, !20, !21, !22, !23}
!75 = !{!76, !69, !23}
!76 = distinct !{!76, !77, !"shadow_0"}
!77 = distinct !{!77, !" diff: %arg"}
!78 = !{!79, !20, !21, !22, !17}
!79 = distinct !{!79, !77, !"primal"}
!80 = !{!79, !69, !23}
!81 = !{!76, !20, !21, !22, !17}
!82 = !DILocation(line: 13, scope: !29, inlinedAt: !83)
!83 = distinct !DILocation(line: 12, scope: !32, inlinedAt: !84)
!84 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !85)
!85 = distinct !DILocation(line: 11, scope: !65, inlinedAt: !86)
!86 = distinct !DILocation(line: 0, scope: !65)
!87 = !{!88, !22}
!88 = distinct !{!88, !89, !"primal"}
!89 = distinct !{!89, !" diff: %i5"}
!90 = !{!91, !20, !21, !17, !23}
!91 = distinct !{!91, !89, !"shadow_0"}
!92 = !DILocation(line: 14, scope: !29, inlinedAt: !83)
!93 = !{!94, !22}
!94 = distinct !{!94, !95, !"primal"}
!95 = distinct !{!95, !" diff: %i8"}
!96 = !{!97, !20, !21, !17, !23}
!97 = distinct !{!97, !95, !"shadow_0"}
!98 = !DILocation(line: 408, scope: !45, inlinedAt: !83)
!99 = !DILocation(line: 969, scope: !48, inlinedAt: !83)
!100 = !{!91, !69, !20, !21, !17, !23}
!101 = !DILocation(line: 75, scope: !34, inlinedAt: !102)
!102 = distinct !DILocation(line: 14, scope: !65, inlinedAt: !86)
!103 = !DILocation(line: 13, scope: !29, inlinedAt: !104)
!104 = distinct !DILocation(line: 15, scope: !32, inlinedAt: !105)
!105 = distinct !DILocation(line: 77, scope: !34, inlinedAt: !102)
!106 = !{!107, !22}
!107 = distinct !{!107, !108, !"primal"}
!108 = distinct !{!108, !" diff: %i87"}
!109 = !{!110, !20, !21, !17, !23}
!110 = distinct !{!110, !108, !"shadow_0"}
!111 = !DILocation(line: 14, scope: !29, inlinedAt: !104)
!112 = !DILocation(line: 408, scope: !45, inlinedAt: !104)
!113 = !DILocation(line: 969, scope: !48, inlinedAt: !104)
!114 = !{!110, !69, !20, !21, !17, !23}
!115 = !DILocation(line: 87, scope: !60, inlinedAt: !116)
!116 = distinct !DILocation(line: 78, scope: !34, inlinedAt: !102)
!117 = distinct !{!117}
!118 = !DILocation(line: 0, scope: !65)
!119 = distinct !{}
!120 = !{!91, !22}
!121 = !{!88, !69, !20, !21, !17, !23}
!122 = !{!97, !22}
!123 = !{!94, !20, !21, !17, !23}
!124 = !{!88, !20, !21, !17, !23}
!125 = !{!110, !22}
!126 = !{!107, !69, !20, !21, !17, !23}
!127 = !{!107, !20, !21, !17, !23}
""","dsquare"), Cvoid, Tuple{Any,Any,Any,Any,Int64},
K,dK, acc,dacc,N)
end

K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
grad(K, dK, acc,dacc, N)
@show dK

wsmoses avatar Jun 08 '23 01:06 wsmoses

Intended output (1's):

f0
f4
f8
f12
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]

Bad output:

f0
f4
f8
f12
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]

wsmoses avatar Jun 08 '23 01:06 wsmoses

Post mem2reg keeps bug O1 destroys the bug (input here is the mem2reg): https://godbolt.org/z/MMTefhjj1

wsmoses avatar Jun 08 '23 01:06 wsmoses

Output of that godbolt (adding instcombine and simplifycfg) still errors.

wsmoses avatar Jun 08 '23 01:06 wsmoses

origin julia code:

using LinearAlgebra
using Enzyme
using EnzymeTestUtils

Enzyme.API.printall!(true)


function local_sensitivity_non_mutating(K, acc, N)

    # acc = summm(K)
        @inbounds @simd for i in 1:16
	  acc[i] += K[i,1]
        end
        @inbounds @simd for i in 1:N
	  acc[i] += K[i,2]
        end
    return
end

# differentiate sum(coupled_springs(K, m, x0, v0, Ktmp, xtmp, vtmp; dt = 0.001, T = 1.0)) w.r.t. K
N = 16
M = 16
K = ones(N, M)
m = 0.5 .+ 0.5 * rand(N)
x0 = ones(M)
v0 = zeros(M)

function enzyme_gradient(f::F, K, m, x0, v0, T) where F
    dK = zero(K)
    acc = Mzeros(N)
    dacc = zeros(N)
    dacc[2] = 1.0

    Enzyme.autodiff(Reverse, Const(f), Const, Duplicated(K, dK), Duplicated(acc, dacc), Const(N))
    @show dK
    nothing
    # forward, pullback = Enzyme.autodiff_thunk(
    #     ReverseSplitModified(ReverseSplitWithPrimal, Val((false, false, false, false, false, false))),
    #     Const{F}, Active{Float64},
    #     Duplicated{typeof(K)}, Duplicated{typeof(m)}, Duplicated{typeof(x0)}, Duplicated{typeof(v0)}, Const{typeof(T)})

    # tape, primal, shadow = forward(Const(f), Duplicated(K, dK), Duplicated(m, dm), Duplicated(x0, dx0), Duplicated(v0, dv0), Const(T))
    # dret = pullback(Const(f), Duplicated(K, dK), Duplicated(m, dm), Duplicated(x0, dx0), Duplicated(v0, dv0), Const(T), one(eltype(K)), tape)
    # return dK
end
dK1 = enzyme_gradient(local_sensitivity_non_mutating, K, m, x0, v0, 0.001)

wsmoses avatar Jun 08 '23 01:06 wsmoses

Not erroring:


function grad(K, dK, acc, dacc, N)
 @GC.preserve dK dacc Base.llvmcall(("""
; ModuleID = '/app/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0

declare void @__enzyme_autodiff(...) local_unnamed_addr

define i64 @dsquare({} addrspace(10)* %da, i64 %db) local_unnamed_addr {
bb:
  %ddb = inttoptr i64 %db to double*
  %r = call i64 @diffesq({} addrspace(10)* %da, double* %ddb, i64 4)
  ret i64 %r
}

declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr

; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq({} addrspace(10)* nocapture nofree align 16 %"arg'", double* %"i5'ipl", i64 %loopsize) #1 !dbg !6 {
entry:
  %i = call {}*** @julia.get_pgcstack() #2
  %"i6'ipc" = bitcast {} addrspace(10)* %"arg'" to double addrspace(13)* addrspace(10)*
  %"i7'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i6'ipc" to double addrspace(13)* addrspace(11)*
  %"i8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i7'ipc", align 16
  %"i12'ipg" = getelementptr inbounds double, double* %"i5'ipl", i64 1, !dbg !38
  %"i14'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 1, !dbg !56
  br label %L51.i, !dbg !69

L51.i:                                            ; preds = %L51.i, %entry
  %iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
  %iv.next = add i64 %iv, 1
  %niter.ncmp.3.not = icmp eq i64 %iv.next, %loopsize, !dbg !69
  br i1 %niter.ncmp.3.not, label %invertL51.i, label %L51.i, !dbg !69, !llvm.loop !89

invertentry:                                      ; preds = %invertL51.i
  %i2 = load double, double* %"i12'ipg", align 8
  store double 0.000000e+00, double* %"i12'ipg", align 8, !dbg !65
  %i9 = load double, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
  %i10 = fadd fast double %i9, %i2
  store double %i10, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
  %i11 = load double, double* %"i12'ipg", align 8, !dbg !38
  %i17 = fadd fast double %i11, %i2
  store double %i17, double* %"i12'ipg", align 8, !dbg !38
  ret i64 %iv

invertL51.i:                                      ; preds = %incinvertL51.i, %L51.i
  %"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %iv, %L51.i ]
  %_unwrap = shl i64 %"iv'ac.0", 2
  %i115_unwrap = or i64 %_unwrap, 3
  %"i116'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i115_unwrap
  %i18 = load double, double* %"i116'ipg_unwrap", align 8
  store double 0.000000e+00, double* %"i116'ipg_unwrap", align 8, !dbg !81
  %i118_unwrap = add i64 %_unwrap, 19
  %"i119'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118_unwrap
  %i19 = load double, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
  %i20 = fadd fast double %i19, %i18
  store double %i20, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
  %i21 = load double, double* %"i116'ipg_unwrap", align 8, !dbg !71
  %i22 = fadd fast double %i21, %i18
  store double %i22, double* %"i116'ipg_unwrap", align 8, !dbg !71
  %i108_unwrap = or i64 %_unwrap, 2
  %"i109'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i108_unwrap
  %i23 = load double, double* %"i109'ipg_unwrap", align 8
  store double 0.000000e+00, double* %"i109'ipg_unwrap", align 8, !dbg !81
  %i111_unwrap = add i64 %_unwrap, 18
  %"i112'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111_unwrap
  %i24 = load double, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
  %i25 = fadd fast double %i24, %i23
  store double %i25, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
  %i26 = load double, double* %"i109'ipg_unwrap", align 8, !dbg !71
  %i27 = fadd fast double %i26, %i23
  store double %i27, double* %"i109'ipg_unwrap", align 8, !dbg !71
  %i101_unwrap = or i64 %_unwrap, 1
  %"i102'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i101_unwrap
  %i28 = load double, double* %"i102'ipg_unwrap", align 8
  store double 0.000000e+00, double* %"i102'ipg_unwrap", align 8, !dbg !81
  %i104_unwrap = add i64 %_unwrap, 17
  %"i105'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104_unwrap
  %i29 = load double, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
  %i30 = fadd fast double %i29, %i28
  store double %i30, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
  %a19 = load double, double* %"i102'ipg_unwrap", align 8, !dbg !71
  %a20 = fadd fast double %a19, %i28
  store double %a20, double* %"i102'ipg_unwrap", align 8, !dbg !71
  %"i95'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %_unwrap
  %a21 = load double, double* %"i95'ipg_unwrap", align 8
  store double 0.000000e+00, double* %"i95'ipg_unwrap", align 8, !dbg !81
  %i97_unwrap = add i64 %_unwrap, 16
  %"i98'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97_unwrap
  %a22 = load double, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
  %a23 = fadd fast double %a22, %a21
  store double %a23, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
  %a26 = icmp eq i64 %"iv'ac.0", 0
  %a27 = add i64 %"iv'ac.0", -1
  br i1 %a26, label %invertentry, label %invertL51.i
}

attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { mustprogress willreturn }

!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17, !19}
!15 = distinct !{!15, !16, !"shadow_0"}
!16 = distinct !{!16, !" diff: %arg1"}
!17 = distinct !{!17, !18, !"na_addr13"}
!18 = distinct !{!18, !"addr13"}
!19 = !{!"jnoalias_typemd", !20}
!20 = !{!"jnoalias"}
!21 = !{!22, !23, !24, !25, !26}
!22 = distinct !{!22, !16, !"primal"}
!23 = !{!"jnoalias_gcframe", !20}
!24 = !{!"jnoalias_stack", !20}
!25 = !{!"jnoalias_data", !20}
!26 = !{!"jnoalias_const", !20}
!27 = !{!22, !17, !19}
!28 = !{!15, !23, !24, !25, !26}
!29 = !{!30, !30, i64 0, i64 1}
!30 = !{!"jtbaa_const", !12, i64 0}
!31 = !{!32, !17, !26}
!32 = distinct !{!32, !33, !"shadow_0"}
!33 = distinct !{!33, !" diff: %arg"}
!34 = !{!35, !23, !24, !25, !19}
!35 = distinct !{!35, !33, !"primal"}
!36 = !{!35, !17, !26}
!37 = !{!32, !23, !24, !25, !19}
!38 = !DILocation(line: 13, scope: !39, inlinedAt: !41)
!39 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !40, file: !40, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!40 = !DIFile(filename: "essentials.jl", directory: ".")
!41 = distinct !DILocation(line: 12, scope: !42, inlinedAt: !43)
!42 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!43 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !46)
!44 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !45, file: !45, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!45 = !DIFile(filename: "simdloop.jl", directory: ".")
!46 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !47)
!47 = distinct !DILocation(line: 0, scope: !6)
!48 = !{!49, !49, i64 0}
!49 = !{!"jtbaa_arraybuf", !50, i64 0}
!50 = !{!"jtbaa_data", !12, i64 0}
!51 = !{!52, !25}
!52 = distinct !{!52, !53, !"primal"}
!53 = distinct !{!53, !" diff: %i5"}
!54 = !{!55, !23, !24, !19, !26}
!55 = distinct !{!55, !53, !"shadow_0"}
!56 = !DILocation(line: 14, scope: !39, inlinedAt: !41)
!57 = !{!58, !25}
!58 = distinct !{!58, !59, !"primal"}
!59 = distinct !{!59, !" diff: %i8"}
!60 = !{!61, !23, !24, !19, !26}
!61 = distinct !{!61, !59, !"shadow_0"}
!62 = !DILocation(line: 408, scope: !63, inlinedAt: !41)
!63 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!64 = !DIFile(filename: "float.jl", directory: ".")
!65 = !DILocation(line: 969, scope: !66, inlinedAt: !41)
!66 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !67, file: !67, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!55, !17, !23, !24, !19, !26}
!69 = !DILocation(line: 75, scope: !44, inlinedAt: !70)
!70 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !47)
!71 = !DILocation(line: 13, scope: !39, inlinedAt: !72)
!72 = distinct !DILocation(line: 15, scope: !42, inlinedAt: !73)
!73 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !70)
!74 = !{!75, !25}
!75 = distinct !{!75, !76, !"primal"}
!76 = distinct !{!76, !" diff: %i87"}
!77 = !{!78, !23, !24, !19, !26}
!78 = distinct !{!78, !76, !"shadow_0"}
!79 = !DILocation(line: 14, scope: !39, inlinedAt: !72)
!80 = !DILocation(line: 408, scope: !63, inlinedAt: !72)
!81 = !DILocation(line: 969, scope: !66, inlinedAt: !72)
!82 = !{!78, !17, !23, !24, !19, !26}
!83 = !DILocation(line: 87, scope: !84, inlinedAt: !86)
!84 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !85, file: !85, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!85 = !DIFile(filename: "int.jl", directory: ".")
!86 = distinct !DILocation(line: 78, scope: !44, inlinedAt: !70)
!87 = !{!75}
!88 = !{!78}
!89 = distinct !{!89}
!90 = !{!55, !25}
!91 = !{!52, !17, !23, !24, !19, !26}
!92 = !{!61, !25}
!93 = !{!58, !23, !24, !19, !26}
!94 = !{!52, !23, !24, !19, !26}
!95 = !{!78, !25}
!96 = !{!75, !23, !24, !19, !26}
!97 = !{!75, !17, !23, !24, !19, !26}
""","dsquare"), Int64, Tuple{Any,Ptr{Float64},Int64},
dK, pointer(dacc),N)
end

K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
@show grad(K, dK, acc,dacc, N)
@show dK

wsmoses avatar Jun 08 '23 02:06 wsmoses

Does not fail, when address space 10:


function grad(K, dK, acc, dacc, N)
 GC.@preserve dK dacc Base.llvmcall(("""
; ModuleID = '/app/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0

declare void @__enzyme_autodiff(...) local_unnamed_addr

define i64 @dsquare({} addrspace(10)* %da, {} addrspace(10)* %db) local_unnamed_addr {
bb:
  %"i6'ipc" = bitcast {} addrspace(10)* %da to double* addrspace(10)*
  %"i7'ipc" = addrspacecast double* addrspace(10)* %"i6'ipc" to double* addrspace(11)*
  %"i8'ipl" = load double*, double* addrspace(11)* %"i7'ipc", align 16
  %"i3'ipc" = bitcast {} addrspace(10)* %db to double addrspace(13)* addrspace(10)*
  %"i4'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i3'ipc" to double addrspace(13)* addrspace(11)*
  %"i5'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16
  %r = call i64 @diffesq(double* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 4)
  ret i64 %r
}

declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr

; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 %loopsize) #1 !dbg !6 {
entry:
  %i = call {}*** @julia.get_pgcstack() #2
  %"i12'ipg" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 1, !dbg !38
  %"i14'ipg" = getelementptr inbounds double, double* %"i8'ipl", i64 1, !dbg !56
  br label %L51.i, !dbg !69

L51.i:                                            ; preds = %L51.i, %entry
  %iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
  %iv.next = add i64 %iv, 1
  %niter.ncmp.3.not = icmp eq i64 %iv.next, %loopsize, !dbg !69
  br i1 %niter.ncmp.3.not, label %invertL51.i, label %L51.i, !dbg !69, !llvm.loop !89

invertentry:                                      ; preds = %invertL51.i
  %i2 = load double, double addrspace(13)* %"i12'ipg", align 8
  store double 0.000000e+00, double addrspace(13)* %"i12'ipg", align 8, !dbg !65
  %i9 = load double, double* %"i14'ipg", align 8, !dbg !56
  %i10 = fadd fast double %i9, %i2
  store double %i10, double* %"i14'ipg", align 8, !dbg !56
  %i11 = load double, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
  %i17 = fadd fast double %i11, %i2
  store double %i17, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
  ret i64 %iv

invertL51.i:                                      ; preds = %incinvertL51.i, %L51.i
  %"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %iv, %L51.i ]
  %_unwrap = shl i64 %"iv'ac.0", 2
  %i115_unwrap = or i64 %_unwrap, 3
  %"i116'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i115_unwrap
  %i18 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !81
  %i118_unwrap = add i64 %_unwrap, 19
  %"i119'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i118_unwrap
  %i19 = load double, double* %"i119'ipg_unwrap", align 8, !dbg !79
  %i20 = fadd fast double %i19, %i18
  store double %i20, double* %"i119'ipg_unwrap", align 8, !dbg !79
  %i21 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
  %i22 = fadd fast double %i21, %i18
  store double %i22, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
  %i108_unwrap = or i64 %_unwrap, 2
  %"i109'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i108_unwrap
  %i23 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !81
  %i111_unwrap = add i64 %_unwrap, 18
  %"i112'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i111_unwrap
  %i24 = load double, double* %"i112'ipg_unwrap", align 8, !dbg !79
  %i25 = fadd fast double %i24, %i23
  store double %i25, double* %"i112'ipg_unwrap", align 8, !dbg !79
  %i26 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
  %i27 = fadd fast double %i26, %i23
  store double %i27, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
  %i101_unwrap = or i64 %_unwrap, 1
  %"i102'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i101_unwrap
  %i28 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !81
  %i104_unwrap = add i64 %_unwrap, 17
  %"i105'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i104_unwrap
  %i29 = load double, double* %"i105'ipg_unwrap", align 8, !dbg !79
  %i30 = fadd fast double %i29, %i28
  store double %i30, double* %"i105'ipg_unwrap", align 8, !dbg !79
  %a19 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
  %a20 = fadd fast double %a19, %i28
  store double %a20, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
  %"i95'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %_unwrap
  %a21 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !81
  %i97_unwrap = add i64 %_unwrap, 16
  %"i98'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i97_unwrap
  %a22 = load double, double* %"i98'ipg_unwrap", align 8, !dbg !79
  %a23 = fadd fast double %a22, %a21
  store double %a23, double* %"i98'ipg_unwrap", align 8, !dbg !79
  %a26 = icmp eq i64 %"iv'ac.0", 0
  %a27 = add i64 %"iv'ac.0", -1
  br i1 %a26, label %invertentry, label %invertL51.i
}

attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { mustprogress willreturn }

!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17, !19}
!15 = distinct !{!15, !16, !"shadow_0"}
!16 = distinct !{!16, !" diff: %arg1"}
!17 = distinct !{!17, !18, !"na_addr13"}
!18 = distinct !{!18, !"addr13"}
!19 = !{!"jnoalias_typemd", !20}
!20 = !{!"jnoalias"}
!21 = !{!22, !23, !24, !25, !26}
!22 = distinct !{!22, !16, !"primal"}
!23 = !{!"jnoalias_gcframe", !20}
!24 = !{!"jnoalias_stack", !20}
!25 = !{!"jnoalias_data", !20}
!26 = !{!"jnoalias_const", !20}
!27 = !{!22, !17, !19}
!28 = !{!15, !23, !24, !25, !26}
!29 = !{!30, !30, i64 0, i64 1}
!30 = !{!"jtbaa_const", !12, i64 0}
!31 = !{!32, !17, !26}
!32 = distinct !{!32, !33, !"shadow_0"}
!33 = distinct !{!33, !" diff: %arg"}
!34 = !{!35, !23, !24, !25, !19}
!35 = distinct !{!35, !33, !"primal"}
!36 = !{!35, !17, !26}
!37 = !{!32, !23, !24, !25, !19}
!38 = !DILocation(line: 13, scope: !39, inlinedAt: !41)
!39 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !40, file: !40, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!40 = !DIFile(filename: "essentials.jl", directory: ".")
!41 = distinct !DILocation(line: 12, scope: !42, inlinedAt: !43)
!42 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!43 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !46)
!44 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !45, file: !45, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!45 = !DIFile(filename: "simdloop.jl", directory: ".")
!46 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !47)
!47 = distinct !DILocation(line: 0, scope: !6)
!48 = !{!49, !49, i64 0}
!49 = !{!"jtbaa_arraybuf", !50, i64 0}
!50 = !{!"jtbaa_data", !12, i64 0}
!51 = !{!52, !25}
!52 = distinct !{!52, !53, !"primal"}
!53 = distinct !{!53, !" diff: %i5"}
!54 = !{!55, !23, !24, !19, !26}
!55 = distinct !{!55, !53, !"shadow_0"}
!56 = !DILocation(line: 14, scope: !39, inlinedAt: !41)
!57 = !{!58, !25}
!58 = distinct !{!58, !59, !"primal"}
!59 = distinct !{!59, !" diff: %i8"}
!60 = !{!61, !23, !24, !19, !26}
!61 = distinct !{!61, !59, !"shadow_0"}
!62 = !DILocation(line: 408, scope: !63, inlinedAt: !41)
!63 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!64 = !DIFile(filename: "float.jl", directory: ".")
!65 = !DILocation(line: 969, scope: !66, inlinedAt: !41)
!66 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !67, file: !67, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!55, !17, !23, !24, !19, !26}
!69 = !DILocation(line: 75, scope: !44, inlinedAt: !70)
!70 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !47)
!71 = !DILocation(line: 13, scope: !39, inlinedAt: !72)
!72 = distinct !DILocation(line: 15, scope: !42, inlinedAt: !73)
!73 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !70)
!74 = !{!75, !25}
!75 = distinct !{!75, !76, !"primal"}
!76 = distinct !{!76, !" diff: %i87"}
!77 = !{!78, !23, !24, !19, !26}
!78 = distinct !{!78, !76, !"shadow_0"}
!79 = !DILocation(line: 14, scope: !39, inlinedAt: !72)
!80 = !DILocation(line: 408, scope: !63, inlinedAt: !72)
!81 = !DILocation(line: 969, scope: !66, inlinedAt: !72)
!82 = !{!78, !17, !23, !24, !19, !26}
!83 = !DILocation(line: 87, scope: !84, inlinedAt: !86)
!84 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !85, file: !85, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!85 = !DIFile(filename: "int.jl", directory: ".")
!86 = distinct !DILocation(line: 78, scope: !44, inlinedAt: !70)
!87 = !{!75}
!88 = !{!78}
!89 = distinct !{!89}
!90 = !{!55, !25}
!91 = !{!52, !17, !23, !24, !19, !26}
!92 = !{!61, !25}
!93 = !{!58, !23, !24, !19, !26}
!94 = !{!52, !23, !24, !19, !26}
!95 = !{!78, !25}
!96 = !{!75, !23, !24, !19, !26}
!97 = !{!75, !17, !23, !24, !19, !26}
""","dsquare"), Int64, Tuple{Any,Any,Int64},
dK, dacc,N)
end

K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
@show grad(K, dK, acc,dacc, N)
@show dK

wsmoses avatar Jun 08 '23 02:06 wsmoses

Does fail with address space 13:


function grad(K, dK, acc, dacc, N)
 GC.@preserve dK dacc Base.llvmcall(("""
; ModuleID = '/app/example.ll'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() local_unnamed_addr #0

declare void @__enzyme_autodiff(...) local_unnamed_addr

define i64 @dsquare({} addrspace(10)* %da, {} addrspace(10)* %db) local_unnamed_addr {
bb:
  %"i6'ipc" = bitcast {} addrspace(10)* %da to double* addrspace(10)*
  %"i7'ipc" = addrspacecast double* addrspace(10)* %"i6'ipc" to double addrspace(13)* addrspace(11)*
  %"i8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i7'ipc", align 16
  %"i3'ipc" = bitcast {} addrspace(10)* %db to double addrspace(13)* addrspace(10)*
  %"i4'ipc" = addrspacecast double addrspace(13)* addrspace(10)* %"i3'ipc" to double addrspace(13)* addrspace(11)*
  %"i5'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"i4'ipc", align 16
  %r = call i64 @diffesq(double addrspace(13)* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 4)
  ret i64 %r
}

declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr

; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double addrspace(13)* %"i8'ipl", double addrspace(13)* %"i5'ipl", i64 %loopsize) #1 !dbg !6 {
entry:
  %i = call {}*** @julia.get_pgcstack() #2
  %"i12'ipg" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 1, !dbg !38
  %"i14'ipg" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 1, !dbg !56
  br label %L51.i, !dbg !69

L51.i:                                            ; preds = %L51.i, %entry
  %iv = phi i64 [ %iv.next, %L51.i ], [ 0, %entry ]
  %iv.next = add i64 %iv, 1
  %niter.ncmp.3.not = icmp eq i64 %iv.next, %loopsize, !dbg !69
  br i1 %niter.ncmp.3.not, label %invertL51.i, label %L51.i, !dbg !69, !llvm.loop !89

invertentry:                                      ; preds = %invertL51.i
  %i2 = load double, double addrspace(13)* %"i12'ipg", align 8
  store double 0.000000e+00, double addrspace(13)* %"i12'ipg", align 8, !dbg !65
  %i9 = load double, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
  %i10 = fadd fast double %i9, %i2
  store double %i10, double addrspace(13)* %"i14'ipg", align 8, !dbg !56
  %i11 = load double, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
  %i17 = fadd fast double %i11, %i2
  store double %i17, double addrspace(13)* %"i12'ipg", align 8, !dbg !38
  ret i64 %iv

invertL51.i:                                      ; preds = %incinvertL51.i, %L51.i
  %"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %iv, %L51.i ]
  %_unwrap = shl i64 %"iv'ac.0", 2
  %i115_unwrap = or i64 %_unwrap, 3
  %"i116'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i115_unwrap
  %i18 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !81
  %i118_unwrap = add i64 %_unwrap, 19
  %"i119'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i118_unwrap
  %i19 = load double, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
  %i20 = fadd fast double %i19, %i18
  store double %i20, double addrspace(13)* %"i119'ipg_unwrap", align 8, !dbg !79
  %i21 = load double, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
  %i22 = fadd fast double %i21, %i18
  store double %i22, double addrspace(13)* %"i116'ipg_unwrap", align 8, !dbg !71
  %i108_unwrap = or i64 %_unwrap, 2
  %"i109'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i108_unwrap
  %i23 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !81
  %i111_unwrap = add i64 %_unwrap, 18
  %"i112'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i111_unwrap
  %i24 = load double, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
  %i25 = fadd fast double %i24, %i23
  store double %i25, double addrspace(13)* %"i112'ipg_unwrap", align 8, !dbg !79
  %i26 = load double, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
  %i27 = fadd fast double %i26, %i23
  store double %i27, double addrspace(13)* %"i109'ipg_unwrap", align 8, !dbg !71
  %i101_unwrap = or i64 %_unwrap, 1
  %"i102'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %i101_unwrap
  %i28 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !81
  %i104_unwrap = add i64 %_unwrap, 17
  %"i105'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i104_unwrap
  %i29 = load double, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
  %i30 = fadd fast double %i29, %i28
  store double %i30, double addrspace(13)* %"i105'ipg_unwrap", align 8, !dbg !79
  %a19 = load double, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
  %a20 = fadd fast double %a19, %i28
  store double %a20, double addrspace(13)* %"i102'ipg_unwrap", align 8, !dbg !71
  %"i95'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i5'ipl", i64 %_unwrap
  %a21 = load double, double addrspace(13)* %"i95'ipg_unwrap", align 8
  store double 0.000000e+00, double addrspace(13)* %"i95'ipg_unwrap", align 8, !dbg !81
  %i97_unwrap = add i64 %_unwrap, 16
  %"i98'ipg_unwrap" = getelementptr inbounds double, double addrspace(13)* %"i8'ipl", i64 %i97_unwrap
  %a22 = load double, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
  %a23 = fadd fast double %a22, %a21
  store double %a23, double addrspace(13)* %"i98'ipg_unwrap", align 8, !dbg !79
  %a26 = icmp eq i64 %"iv'ac.0", 0
  %a27 = add i64 %"iv'ac.0", -1
  br i1 %a26, label %invertentry, label %invertL51.i
}

attributes #0 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33487" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { mustprogress willreturn }

!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2, !4}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!3 = !DIFile(filename: "abstractarray.jl", directory: ".")
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl", directory: ".")
!6 = distinct !DISubprogram(name: "local_sensitivity_non_mutating", linkageName: "julia_local_sensitivity_non_mutating_747", scope: null, file: !5, line: 8, type: !7, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!7 = !DISubroutineType(types: !8)
!8 = !{}
!9 = !{!10, !10, i64 0}
!10 = !{!"jtbaa_arrayptr", !11, i64 0}
!11 = !{!"jtbaa_array", !12, i64 0}
!12 = !{!"jtbaa", !13, i64 0}
!13 = !{!"jtbaa"}
!14 = !{!15, !17, !19}
!15 = distinct !{!15, !16, !"shadow_0"}
!16 = distinct !{!16, !" diff: %arg1"}
!17 = distinct !{!17, !18, !"na_addr13"}
!18 = distinct !{!18, !"addr13"}
!19 = !{!"jnoalias_typemd", !20}
!20 = !{!"jnoalias"}
!21 = !{!22, !23, !24, !25, !26}
!22 = distinct !{!22, !16, !"primal"}
!23 = !{!"jnoalias_gcframe", !20}
!24 = !{!"jnoalias_stack", !20}
!25 = !{!"jnoalias_data", !20}
!26 = !{!"jnoalias_const", !20}
!27 = !{!22, !17, !19}
!28 = !{!15, !23, !24, !25, !26}
!29 = !{!30, !30, i64 0, i64 1}
!30 = !{!"jtbaa_const", !12, i64 0}
!31 = !{!32, !17, !26}
!32 = distinct !{!32, !33, !"shadow_0"}
!33 = distinct !{!33, !" diff: %arg"}
!34 = !{!35, !23, !24, !25, !19}
!35 = distinct !{!35, !33, !"primal"}
!36 = !{!35, !17, !26}
!37 = !{!32, !23, !24, !25, !19}
!38 = !DILocation(line: 13, scope: !39, inlinedAt: !41)
!39 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !40, file: !40, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!40 = !DIFile(filename: "essentials.jl", directory: ".")
!41 = distinct !DILocation(line: 12, scope: !42, inlinedAt: !43)
!42 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !5, file: !5, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!43 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !46)
!44 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !45, file: !45, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!45 = !DIFile(filename: "simdloop.jl", directory: ".")
!46 = distinct !DILocation(line: 11, scope: !6, inlinedAt: !47)
!47 = distinct !DILocation(line: 0, scope: !6)
!48 = !{!49, !49, i64 0}
!49 = !{!"jtbaa_arraybuf", !50, i64 0}
!50 = !{!"jtbaa_data", !12, i64 0}
!51 = !{!52, !25}
!52 = distinct !{!52, !53, !"primal"}
!53 = distinct !{!53, !" diff: %i5"}
!54 = !{!55, !23, !24, !19, !26}
!55 = distinct !{!55, !53, !"shadow_0"}
!56 = !DILocation(line: 14, scope: !39, inlinedAt: !41)
!57 = !{!58, !25}
!58 = distinct !{!58, !59, !"primal"}
!59 = distinct !{!59, !" diff: %i8"}
!60 = !{!61, !23, !24, !19, !26}
!61 = distinct !{!61, !59, !"shadow_0"}
!62 = !DILocation(line: 408, scope: !63, inlinedAt: !41)
!63 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !64, file: !64, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!64 = !DIFile(filename: "float.jl", directory: ".")
!65 = !DILocation(line: 969, scope: !66, inlinedAt: !41)
!66 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !67, file: !67, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!55, !17, !23, !24, !19, !26}
!69 = !DILocation(line: 75, scope: !44, inlinedAt: !70)
!70 = distinct !DILocation(line: 14, scope: !6, inlinedAt: !47)
!71 = !DILocation(line: 13, scope: !39, inlinedAt: !72)
!72 = distinct !DILocation(line: 15, scope: !42, inlinedAt: !73)
!73 = distinct !DILocation(line: 77, scope: !44, inlinedAt: !70)
!74 = !{!75, !25}
!75 = distinct !{!75, !76, !"primal"}
!76 = distinct !{!76, !" diff: %i87"}
!77 = !{!78, !23, !24, !19, !26}
!78 = distinct !{!78, !76, !"shadow_0"}
!79 = !DILocation(line: 14, scope: !39, inlinedAt: !72)
!80 = !DILocation(line: 408, scope: !63, inlinedAt: !72)
!81 = !DILocation(line: 969, scope: !66, inlinedAt: !72)
!82 = !{!78, !17, !23, !24, !19, !26}
!83 = !DILocation(line: 87, scope: !84, inlinedAt: !86)
!84 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !85, file: !85, type: !7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !8)
!85 = !DIFile(filename: "int.jl", directory: ".")
!86 = distinct !DILocation(line: 78, scope: !44, inlinedAt: !70)
!87 = !{!75}
!88 = !{!78}
!89 = distinct !{!89}
!90 = !{!55, !25}
!91 = !{!52, !17, !23, !24, !19, !26}
!92 = !{!61, !25}
!93 = !{!58, !23, !24, !19, !26}
!94 = !{!52, !23, !24, !19, !26}
!95 = !{!78, !25}
!96 = !{!75, !23, !24, !19, !26}
!97 = !{!75, !17, !23, !24, !19, !26}
""","dsquare"), Int64, Tuple{Any,Any,Int64},
dK, dacc,N)
end

K = zeros(16, 16)
dK = zeros(16, 16)
acc = zeros(16)
dacc = zeros(16)
dacc[2] = 1.0
N = 16
@show grad(K, dK, acc,dacc, N)
@show dK

wsmoses avatar Jun 08 '23 02:06 wsmoses

I am intensely confused here @vchuravy

wsmoses avatar Jun 08 '23 02:06 wsmoses

The incorrect runtime LLVM:

wmoses@beast:~/git/Enzyme.jl (inact_gf) $ cat out.txt 
; ModuleID = 'grad'
source_filename = "grad"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

;  @ /home/wmoses/wat.jl:2 within `grad`
define i64 @julia_grad_42({}* noundef nonnull align 16 dereferenceable(40) %0, {}* noundef nonnull align 16 dereferenceable(40) %1, {}* noundef nonnull align 16 dereferenceable(40) %2, {}* noundef nonnull align 16 dereferenceable(40) %3, i64 signext %4) #0 {
top:
;  @ /home/wmoses/wat.jl:3 within `grad`
  %5 = call i64 @julia_grad_42u44({}* nonnull %1, {}* nonnull %3)
  ret i64 %5
}

define nonnull {}* @jfptr_grad_43({}* %0, {}** noalias nocapture noundef readonly %1, i32 %2) #0 {
top:
  %3 = load {}*, {}** %1, align 8
  %4 = getelementptr inbounds {}*, {}** %1, i64 1
  %5 = load {}*, {}** %4, align 8
  %6 = getelementptr inbounds {}*, {}** %1, i64 2
  %7 = load {}*, {}** %6, align 8
  %8 = getelementptr inbounds {}*, {}** %1, i64 3
  %9 = load {}*, {}** %8, align 8
  %10 = getelementptr inbounds {}*, {}** %1, i64 4
  %11 = bitcast {}** %10 to i64**
  %12 = load i64*, i64** %11, align 8
  %13 = load i64, i64* %12, align 8
  %14 = call i64 @julia_grad_42({}* %3, {}* %5, {}* %7, {}* %9, i64 signext %13) #0
  %15 = call nonnull {}* @ijl_box_int64(i64 signext %14)
  ret {}* %15
}

declare {}* @ijl_box_int64(i64)

declare token @llvm.julia.gc_preserve_begin(...)

declare void @llvm.julia.gc_preserve_end(token)

define internal i64 @julia_grad_42u44({}* %da, {}* %db) {
bb:
  %0 = bitcast {}* %da to double**
  %"i8'ipl" = load double*, double** %0, align 16
  %"i3'ipc" = bitcast {}* %db to double**
  %"i5'ipl" = load double*, double** %"i3'ipc", align 16
  %r = call i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 4)
  ret i64 %r
}

;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:8 within `local_sensitivity_non_mutating`
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 %loopsize) #1 {
entry:
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ essentials.jl:14 within `getindex`
    %"i14'ipg" = getelementptr inbounds double, double* %"i8'ipl", i64 1
; ││ @ essentials.jl:13 within `getindex`
    %"i12'ipg" = getelementptr inbounds double, double* %"i5'ipl", i64 1
; └└
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:75 within `macro expansion`
   %0 = add i64 %loopsize, -1
   %min.iters.check = icmp ult i64 %loopsize, 4
   br i1 %min.iters.check, label %scalar.ph, label %vector.scevcheck

vector.scevcheck:                                 ; preds = %entry
   %1 = shl i64 %loopsize, 2
   %2 = add i64 %1, 12
   %scevgep = getelementptr double, double* %"i8'ipl", i64 %2
   %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 32)
   %mul.result = extractvalue { i64, i1 } %mul, 0
   %mul.overflow = extractvalue { i64, i1 } %mul, 1
   %scevgep2 = bitcast double* %scevgep to i8*
   %3 = sub i64 0, %mul.result
   %4 = getelementptr i8, i8* %scevgep2, i64 %3
   %5 = icmp ugt i8* %4, %scevgep2
   %6 = add i64 %1, 13
   %scevgep3 = getelementptr double, double* %"i8'ipl", i64 %6
   %scevgep37 = bitcast double* %scevgep3 to i8*
   %7 = getelementptr i8, i8* %scevgep37, i64 %3
   %8 = icmp ugt i8* %7, %scevgep37
   %9 = or i1 %8, %mul.overflow
   %10 = add i64 %1, 14
   %scevgep8 = getelementptr double, double* %"i8'ipl", i64 %10
   %scevgep812 = bitcast double* %scevgep8 to i8*
   %11 = getelementptr i8, i8* %scevgep812, i64 %3
   %12 = icmp ugt i8* %11, %scevgep812
   %13 = add i64 %1, 15
   %scevgep13 = getelementptr double, double* %"i8'ipl", i64 %13
   %scevgep1317 = bitcast double* %scevgep13 to i8*
   %14 = getelementptr i8, i8* %scevgep1317, i64 %3
   %15 = icmp ugt i8* %14, %scevgep1317
   %16 = add i64 %1, -4
   %scevgep18 = getelementptr double, double* %"i5'ipl", i64 %16
   %scevgep1822 = bitcast double* %scevgep18 to i8*
   %17 = getelementptr i8, i8* %scevgep1822, i64 %3
   %18 = icmp ugt i8* %17, %scevgep1822
   %19 = or i1 %18, %mul.overflow
   %20 = add i64 %1, -3
   %scevgep23 = getelementptr double, double* %"i5'ipl", i64 %20
   %scevgep2327 = bitcast double* %scevgep23 to i8*
   %21 = getelementptr i8, i8* %scevgep2327, i64 %3
   %22 = icmp ugt i8* %21, %scevgep2327
   %23 = add i64 %1, -2
   %scevgep28 = getelementptr double, double* %"i5'ipl", i64 %23
   %scevgep2832 = bitcast double* %scevgep28 to i8*
   %24 = getelementptr i8, i8* %scevgep2832, i64 %3
   %25 = icmp ugt i8* %24, %scevgep2832
   %26 = add i64 %1, -1
   %scevgep33 = getelementptr double, double* %"i5'ipl", i64 %26
   %scevgep3337 = bitcast double* %scevgep33 to i8*
   %27 = getelementptr i8, i8* %scevgep3337, i64 %3
   %28 = icmp ugt i8* %27, %scevgep3337
   %29 = or i1 %28, %mul.overflow
   %30 = or i1 %5, %9
   %31 = or i1 %12, %30
   %32 = or i1 %15, %31
   %33 = or i1 %32, %19
   %34 = or i1 %22, %33
   %35 = or i1 %25, %34
   %36 = or i1 %35, %29
   br i1 %36, label %scalar.ph, label %vector.memcheck

vector.memcheck:                                  ; preds = %vector.scevcheck
   %scevgep39 = getelementptr double, double* %"i5'ipl", i64 %1
   %scevgep41 = getelementptr double, double* %"i8'ipl", i64 16
   %37 = add i64 %1, 16
   %scevgep43 = getelementptr double, double* %"i8'ipl", i64 %37
   %bound0 = icmp ugt double* %scevgep43, %"i5'ipl"
   %bound1 = icmp ult double* %scevgep41, %scevgep39
   %found.conflict = and i1 %bound0, %bound1
   br i1 %found.conflict, label %scalar.ph, label %vector.ph

vector.ph:                                        ; preds = %vector.memcheck
   %n.vec = and i64 %loopsize, -4
   %ind.end = sub i64 %0, %n.vec
   br label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.ph
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %offset.idx = sub i64 %0, %index
   %38 = shl i64 %offset.idx, 2
   %39 = add i64 %38, -4
   %40 = add i64 %38, -8
   %41 = add i64 %38, -12
   %42 = or i64 %38, 3
   %43 = or i64 %39, 3
   %44 = or i64 %40, 3
   %45 = or i64 %41, 3
   %46 = getelementptr inbounds double, double* %"i5'ipl", i64 %42
   %47 = getelementptr inbounds double, double* %"i5'ipl", i64 %43
   %48 = getelementptr inbounds double, double* %"i5'ipl", i64 %44
   %49 = getelementptr inbounds double, double* %"i5'ipl", i64 %45
   %50 = load double, double* %46, align 8
   %51 = load double, double* %47, align 8
   %52 = load double, double* %48, align 8
   %53 = load double, double* %49, align 8
; │ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:15
; │┌ @ array.jl:969 within `setindex!`
    %54 = insertelement <4 x double> poison, double %50, i64 0
    %55 = insertelement <4 x double> %54, double %51, i64 1
    %56 = insertelement <4 x double> %55, double %52, i64 2
    %57 = insertelement <4 x double> %56, double %53, i64 3
    store double 0.000000e+00, double* %46, align 8
    store double 0.000000e+00, double* %47, align 8
    store double 0.000000e+00, double* %48, align 8
    store double 0.000000e+00, double* %49, align 8
    %58 = add i64 %38, 19
    %59 = getelementptr inbounds double, double* %"i8'ipl", i64 -15
    %60 = getelementptr inbounds double, double* %59, i64 %58
    %61 = bitcast double* %60 to <16 x double>*
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %wide.vec = load <16 x double>, <16 x double>* %61, align 8
    %strided.vec45 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
    %reverse46 = shufflevector <4 x double> %strided.vec45, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %strided.vec47 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
    %reverse48 = shufflevector <4 x double> %strided.vec47, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %strided.vec49 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
    %reverse50 = shufflevector <4 x double> %strided.vec49, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %62 = fadd fast <4 x double> %reverse50, %57
    %63 = getelementptr inbounds double, double* %46, i64 -15
    %64 = bitcast double* %63 to <16 x double>*
; ││ @ essentials.jl:13 within `getindex`
    %wide.vec51 = load <16 x double>, <16 x double>* %64, align 8
    %strided.vec54 = shufflevector <16 x double> %wide.vec51, <16 x double> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
    %reverse55 = shufflevector <4 x double> %strided.vec54, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %strided.vec56 = shufflevector <16 x double> %wide.vec51, <16 x double> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
    %reverse57 = shufflevector <4 x double> %strided.vec56, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %strided.vec58 = shufflevector <16 x double> %wide.vec51, <16 x double> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
    %reverse59 = shufflevector <4 x double> %strided.vec58, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %65 = fadd fast <4 x double> %reverse59, %57
; │└
; │┌ @ array.jl:969 within `setindex!`
    %66 = shufflevector <16 x double> %wide.vec51, <16 x double> undef, <4 x i32> <i32 14, i32 10, i32 6, i32 2>
    %67 = fadd fast <4 x double> %reverse48, %66
    %68 = fadd fast <4 x double> %reverse57, %66
    %69 = or i64 %38, 1
    %70 = or i64 %39, 1
    %71 = or i64 %40, 1
    %72 = or i64 %41, 1
    %73 = getelementptr inbounds double, double* %"i5'ipl", i64 %69
    %74 = getelementptr inbounds double, double* %"i5'ipl", i64 %70
    %75 = getelementptr inbounds double, double* %"i5'ipl", i64 %71
    %76 = getelementptr inbounds double, double* %"i5'ipl", i64 %72
    %77 = shufflevector <16 x double> %wide.vec51, <16 x double> undef, <4 x i32> <i32 13, i32 9, i32 5, i32 1>
    %78 = bitcast double* %73 to <2 x double>*
    store <2 x double> zeroinitializer, <2 x double>* %78, align 8
    %79 = bitcast double* %74 to <2 x double>*
    store <2 x double> zeroinitializer, <2 x double>* %79, align 8
    %80 = bitcast double* %75 to <2 x double>*
    store <2 x double> zeroinitializer, <2 x double>* %80, align 8
    %81 = bitcast double* %76 to <2 x double>*
    store <2 x double> zeroinitializer, <2 x double>* %81, align 8
    %82 = fadd fast <4 x double> %reverse46, %77
    %83 = fadd fast <4 x double> %reverse55, %77
    %84 = getelementptr inbounds double, double* %"i5'ipl", i64 -12
    %85 = getelementptr inbounds double, double* %84, i64 %38
    %86 = bitcast double* %85 to <16 x double>*
    %reverse60 = shufflevector <4 x double> %83, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %reverse61 = shufflevector <4 x double> %68, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %reverse62 = shufflevector <4 x double> %65, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %87 = shufflevector <4 x double> zeroinitializer, <4 x double> %reverse60, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    %88 = shufflevector <4 x double> %reverse61, <4 x double> %reverse62, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    %interleaved.vec = shufflevector <8 x double> %87, <8 x double> %88, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
    store <16 x double> %interleaved.vec, <16 x double>* %86, align 8
    %89 = add i64 %38, 16
    %90 = getelementptr inbounds double, double* %"i8'ipl", i64 -12
    %91 = fadd fast <16 x double> %wide.vec, %wide.vec51
    %92 = shufflevector <16 x double> %91, <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
    %93 = shufflevector <4 x double> %92, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %94 = getelementptr inbounds double, double* %90, i64 %89
    %95 = bitcast double* %94 to <16 x double>*
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %reverse63 = shufflevector <4 x double> %93, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %reverse64 = shufflevector <4 x double> %82, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %reverse65 = shufflevector <4 x double> %67, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %reverse66 = shufflevector <4 x double> %62, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    %96 = shufflevector <4 x double> %reverse63, <4 x double> %reverse64, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    %97 = shufflevector <4 x double> %reverse65, <4 x double> %reverse66, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    %interleaved.vec67 = shufflevector <8 x double> %96, <8 x double> %97, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
    store <16 x double> %interleaved.vec67, <16 x double>* %95, align 8
    %index.next = add nuw i64 %index, 4
    %98 = icmp eq i64 %index.next, %n.vec
    br i1 %98, label %middle.block, label %vector.body

middle.block:                                     ; preds = %vector.body
    %cmp.n = icmp eq i64 %n.vec, %loopsize
    br i1 %cmp.n, label %invertentry, label %scalar.ph

scalar.ph:                                        ; preds = %middle.block, %vector.memcheck, %vector.scevcheck, %entry
    %bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ %0, %entry ], [ %0, %vector.scevcheck ], [ %0, %vector.memcheck ]
    br label %invertL51.i

invertentry:                                      ; preds = %invertL51.i, %middle.block
    %i2 = load double, double* %"i12'ipg", align 8
; └└
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i12'ipg", align 8
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i9 = load double, double* %"i14'ipg", align 8
    %i10 = fadd fast double %i9, %i2
    store double %i10, double* %"i14'ipg", align 8
; ││ @ essentials.jl:13 within `getindex`
    %i11 = load double, double* %"i12'ipg", align 8
    %i17 = fadd fast double %i11, %i2
    store double %i17, double* %"i12'ipg", align 8
    ret i64 %0

invertL51.i:                                      ; preds = %invertL51.i, %scalar.ph
    %"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %bc.resume.val, %scalar.ph ]
    %_unwrap = shl i64 %"iv'ac.0", 2
    %i115_unwrap = or i64 %_unwrap, 3
    %"i116'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i115_unwrap
    %i18 = load double, double* %"i116'ipg_unwrap", align 8
; └└
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:15
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i116'ipg_unwrap", align 8
    %i118_unwrap = add i64 %_unwrap, 19
    %"i119'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i118_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i19 = load double, double* %"i119'ipg_unwrap", align 8
    %i20 = fadd fast double %i19, %i18
    store double %i20, double* %"i119'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
    %i21 = load double, double* %"i116'ipg_unwrap", align 8
    %i22 = fadd fast double %i21, %i18
    store double %i22, double* %"i116'ipg_unwrap", align 8
    %i108_unwrap = or i64 %_unwrap, 2
    %"i109'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i108_unwrap
    %i23 = load double, double* %"i109'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i109'ipg_unwrap", align 8
    %i111_unwrap = add i64 %_unwrap, 18
    %"i112'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i111_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i24 = load double, double* %"i112'ipg_unwrap", align 8
    %i25 = fadd fast double %i24, %i23
    store double %i25, double* %"i112'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
    %i26 = load double, double* %"i109'ipg_unwrap", align 8
    %i27 = fadd fast double %i26, %i23
    store double %i27, double* %"i109'ipg_unwrap", align 8
    %i101_unwrap = or i64 %_unwrap, 1
    %"i102'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i101_unwrap
    %i28 = load double, double* %"i102'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i102'ipg_unwrap", align 8
    %i104_unwrap = add i64 %_unwrap, 17
    %"i105'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i104_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i29 = load double, double* %"i105'ipg_unwrap", align 8
    %i30 = fadd fast double %i29, %i28
    store double %i30, double* %"i105'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
    %a19 = load double, double* %"i102'ipg_unwrap", align 8
    %a20 = fadd fast double %a19, %i28
    store double %a20, double* %"i102'ipg_unwrap", align 8
    %"i95'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %_unwrap
    %a21 = load double, double* %"i95'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i95'ipg_unwrap", align 8
    %i97_unwrap = add i64 %_unwrap, 16
    %"i98'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i97_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %a22 = load double, double* %"i98'ipg_unwrap", align 8
    %a23 = fadd fast double %a22, %a21
    store double %a23, double* %"i98'ipg_unwrap", align 8
    %a26 = icmp eq i64 %"iv'ac.0", 0
    %a27 = add i64 %"iv'ac.0", -1
    br i1 %a26, label %invertentry, label %invertL51.i
; └└
}

declare void @ijl_gc_queue_root({}*)

declare void @jl_gc_queue_binding({}*)

declare {}* @ijl_gc_pool_alloc(i8*, i32, i32)

declare {}* @ijl_gc_big_alloc(i8*, i64)

declare {}* @ijl_gc_alloc_typed(i8*, i64, i8*)

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #2

attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #1 = { mustprogress noinline willreturn }
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }

!llvm.module.flags = !{!0, !1, !2, !3}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"stack-protector-guard", !""}
!3 = !{i32 1, !"override-stack-alignment", i32 0}
#= /home/wmoses/wat.jl:221 =# @code_llvm(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
	.text
	.file	"grad"
	.globl	julia_grad_57                   # -- Begin function julia_grad_57
	.p2align	4, 0x90
	.type	julia_grad_57,@function
julia_grad_57:                          # @julia_grad_57
.Lfunc_begin0:
; ┌ @ /home/wmoses/wat.jl:2 within `grad`
	.cfi_startproc
# %bb.0:                                # %top
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	movq	%rsi, %rdi
; │ @ /home/wmoses/wat.jl:3 within `grad`
	movabsq	$julia_grad_57u59, %rax
	movq	%rcx, %rsi
	callq	*%rax
	popq	%rbp
	.cfi_def_cfa %rsp, 8
	retq
.Lfunc_end0:
	.size	julia_grad_57, .Lfunc_end0-julia_grad_57
	.cfi_endproc
; └
                                        # -- End function
	.section	".note.GNU-stack","",@progbits
#= /home/wmoses/wat.jl:222 =# @code_native(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
grad(K, dK, acc, dacc, N) = 3
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]

wsmoses avatar Jun 08 '23 02:06 wsmoses

The correct runtime llvm:

; ModuleID = 'grad'
source_filename = "grad"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

;  @ /home/wmoses/wat2.jl:2 within `grad`
define i64 @julia_grad_42({}* noundef nonnull align 16 dereferenceable(40) %0, {}* noundef nonnull align 16 dereferenceable(40) %1, {}* noundef nonnull align 16 dereferenceable(40) %2, {}* noundef nonnull align 16 dereferenceable(40) %3, i64 signext %4) #0 {
top:
;  @ /home/wmoses/wat2.jl:3 within `grad`
  %5 = call i64 @julia_grad_42u44({}* nonnull %1, {}* nonnull %3)
  ret i64 %5
}

define nonnull {}* @jfptr_grad_43({}* %0, {}** noalias nocapture noundef readonly %1, i32 %2) #0 {
top:
  %3 = load {}*, {}** %1, align 8
  %4 = getelementptr inbounds {}*, {}** %1, i64 1
  %5 = load {}*, {}** %4, align 8
  %6 = getelementptr inbounds {}*, {}** %1, i64 2
  %7 = load {}*, {}** %6, align 8
  %8 = getelementptr inbounds {}*, {}** %1, i64 3
  %9 = load {}*, {}** %8, align 8
  %10 = getelementptr inbounds {}*, {}** %1, i64 4
  %11 = bitcast {}** %10 to i64**
  %12 = load i64*, i64** %11, align 8
  %13 = load i64, i64* %12, align 8
  %14 = call i64 @julia_grad_42({}* %3, {}* %5, {}* %7, {}* %9, i64 signext %13) #0
  %15 = call nonnull {}* @ijl_box_int64(i64 signext %14)
  ret {}* %15
}

declare {}* @ijl_box_int64(i64)

declare token @llvm.julia.gc_preserve_begin(...)

declare void @llvm.julia.gc_preserve_end(token)

define internal i64 @julia_grad_42u44({}* %da, {}* %db) {
bb:
  %"i6'ipc" = bitcast {}* %da to double**
  %"i8'ipl" = load double*, double** %"i6'ipc", align 16
  %"i3'ipc" = bitcast {}* %db to double**
  %"i5'ipl" = load double*, double** %"i3'ipc", align 16
  %r = call i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 4)
  ret i64 %r
}

;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:8 within `local_sensitivity_non_mutating`
; Function Attrs: mustprogress noinline willreturn
define internal i64 @diffesq(double* %"i8'ipl", double* %"i5'ipl", i64 %loopsize) #1 {
entry:
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ essentials.jl:14 within `getindex`
    %"i14'ipg" = getelementptr inbounds double, double* %"i8'ipl", i64 1
; ││ @ essentials.jl:13 within `getindex`
    %"i12'ipg" = getelementptr inbounds double, double* %"i5'ipl", i64 1
; └└
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:75 within `macro expansion`
   %0 = add i64 %loopsize, -1
   br label %invertL51.i

invertentry:                                      ; preds = %invertL51.i
   %i2 = load double, double* %"i12'ipg", align 8
; └
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:11
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:12
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i12'ipg", align 8
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i9 = load double, double* %"i14'ipg", align 8
    %i10 = fadd fast double %i9, %i2
    store double %i10, double* %"i14'ipg", align 8
; ││ @ essentials.jl:13 within `getindex`
    %i11 = load double, double* %"i12'ipg", align 8
    %i17 = fadd fast double %i11, %i2
    store double %i17, double* %"i12'ipg", align 8
    ret i64 %0

invertL51.i:                                      ; preds = %invertL51.i, %entry
    %"iv'ac.0" = phi i64 [ %a27, %invertL51.i ], [ %0, %entry ]
    %_unwrap = shl i64 %"iv'ac.0", 2
    %i115_unwrap = or i64 %_unwrap, 3
    %"i116'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i115_unwrap
    %i18 = load double, double* %"i116'ipg_unwrap", align 8
; └└
;  @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl within `local_sensitivity_non_mutating` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:14
; ┌ @ simdloop.jl:77 within `macro expansion` @ /home/vchuravy/src/ICMLBLAS/julia_experiments/coupled_springs.jl:15
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i116'ipg_unwrap", align 8
    %i118_unwrap = add i64 %_unwrap, 19
    %"i119'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i118_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i19 = load double, double* %"i119'ipg_unwrap", align 8
    %i20 = fadd fast double %i19, %i18
    store double %i20, double* %"i119'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
    %i21 = load double, double* %"i116'ipg_unwrap", align 8
    %i22 = fadd fast double %i21, %i18
    store double %i22, double* %"i116'ipg_unwrap", align 8
    %i108_unwrap = or i64 %_unwrap, 2
    %"i109'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i108_unwrap
    %i23 = load double, double* %"i109'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i109'ipg_unwrap", align 8
    %i111_unwrap = add i64 %_unwrap, 18
    %"i112'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i111_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i24 = load double, double* %"i112'ipg_unwrap", align 8
    %i25 = fadd fast double %i24, %i23
    store double %i25, double* %"i112'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
    %i26 = load double, double* %"i109'ipg_unwrap", align 8
    %i27 = fadd fast double %i26, %i23
    store double %i27, double* %"i109'ipg_unwrap", align 8
    %i101_unwrap = or i64 %_unwrap, 1
    %"i102'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %i101_unwrap
    %i28 = load double, double* %"i102'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i102'ipg_unwrap", align 8
    %i104_unwrap = add i64 %_unwrap, 17
    %"i105'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i104_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %i29 = load double, double* %"i105'ipg_unwrap", align 8
    %i30 = fadd fast double %i29, %i28
    store double %i30, double* %"i105'ipg_unwrap", align 8
; ││ @ essentials.jl:13 within `getindex`
    %a19 = load double, double* %"i102'ipg_unwrap", align 8
    %a20 = fadd fast double %a19, %i28
    store double %a20, double* %"i102'ipg_unwrap", align 8
    %"i95'ipg_unwrap" = getelementptr inbounds double, double* %"i5'ipl", i64 %_unwrap
    %a21 = load double, double* %"i95'ipg_unwrap", align 8
; │└
; │┌ @ array.jl:969 within `setindex!`
    store double 0.000000e+00, double* %"i95'ipg_unwrap", align 8
    %i97_unwrap = add i64 %_unwrap, 16
    %"i98'ipg_unwrap" = getelementptr inbounds double, double* %"i8'ipl", i64 %i97_unwrap
; │└
; │┌ @ essentials.jl:14 within `getindex`
    %a22 = load double, double* %"i98'ipg_unwrap", align 8
    %a23 = fadd fast double %a22, %a21
    store double %a23, double* %"i98'ipg_unwrap", align 8
    %a26 = icmp eq i64 %"iv'ac.0", 0
    %a27 = add i64 %"iv'ac.0", -1
    br i1 %a26, label %invertentry, label %invertL51.i
; └└
}

declare void @ijl_gc_queue_root({}*)

declare void @jl_gc_queue_binding({}*)

declare {}* @ijl_gc_pool_alloc(i8*, i32, i32)

declare {}* @ijl_gc_big_alloc(i8*, i64)

declare {}* @ijl_gc_alloc_typed(i8*, i64, i8*)

attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #1 = { mustprogress noinline willreturn }

!llvm.module.flags = !{!0, !1, !2, !3}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"stack-protector-guard", !""}
!3 = !{i32 1, !"override-stack-alignment", i32 0}
#= /home/wmoses/wat2.jl:221 =# @code_llvm(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
	.text
	.file	"grad"
	.globl	julia_grad_57                   # -- Begin function julia_grad_57
	.p2align	4, 0x90
	.type	julia_grad_57,@function
julia_grad_57:                          # @julia_grad_57
.Lfunc_begin0:
; ┌ @ /home/wmoses/wat2.jl:2 within `grad`
	.cfi_startproc
# %bb.0:                                # %top
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	movq	%rsi, %rdi
; │ @ /home/wmoses/wat2.jl:3 within `grad`
	movabsq	$julia_grad_57u59, %rax
	movq	%rcx, %rsi
	callq	*%rax
	popq	%rbp
	.cfi_def_cfa %rsp, 8
	retq
.Lfunc_end0:
	.size	julia_grad_57, .Lfunc_end0-julia_grad_57
	.cfi_endproc
; └
                                        # -- End function
	.section	".note.GNU-stack","",@progbits
#= /home/wmoses/wat2.jl:222 =# @code_native(dump_module = true, grad(K, dK, acc, dacc, N)) = nothing
grad(K, dK, acc, dacc, N) = 3
dK = [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]

wsmoses avatar Jun 08 '23 02:06 wsmoses