julia icon indicating copy to clipboard operation
julia copied to clipboard

Type stabilize hvcat_fill!

Open wsmoses opened this issue 2 years ago • 1 comments

Presently hvcat_fill is type unstable if the input tuple does not contain values of the same type.

This changes the iteration index to be of the tuple itself (permitting unrolling) and therefore type stabilization.

It would be really nice to have this backported to 1.10 before release (and other Julia's too, if possible).

Before this change it would behave like this:

julia> @code_llvm Base.hvcat_fill!(Matrix{Float64}(undef, 2, 2), (((1, 2.0, 3, 4.0)))
       )
;  @ abstractarray.jl:2126 within `hvcat_fill!`
define nonnull {}* @"julia_hvcat_fill!_625"({}* noundef nonnull align 16 dereferenceable(40) %0, { i64, double, i64, double }* nocapture noundef nonnull readonly align 8 dereferenceable(32) %1) #0 {
top:
  %2 = alloca [4 x {}*], align 8
  %gcframe57 = alloca [3 x {}*], align 16
  %gcframe57.sub = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 0
  %3 = bitcast [3 x {}*]* %gcframe57 to i8*
  call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 true)
  %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #13
  %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
  %4 = bitcast i8* %tls_ppgcstack to {}****
  %tls_pgcstack = load {}***, {}**** %4, align 8
;  @ abstractarray.jl:2127 within `hvcat_fill!`
; ┌ @ array.jl:190 within `size`
   %5 = bitcast [3 x {}*]* %gcframe57 to i64*
   store i64 4, i64* %5, align 16
   %6 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 1
   %7 = bitcast {}** %6 to {}***
   %8 = load {}**, {}*** %tls_pgcstack, align 8
   store {}** %8, {}*** %7, align 8
   %9 = bitcast {}*** %tls_pgcstack to {}***
   store {}** %gcframe57.sub, {}*** %9, align 8
   %10 = bitcast {}* %0 to {}**
   %arraysize_ptr = getelementptr inbounds {}*, {}** %10, i64 3
   %11 = bitcast {}** %arraysize_ptr to i64*
   %arraysize = load i64, i64* %11, align 8
   %arraysize_ptr1 = getelementptr inbounds {}*, {}** %10, i64 4
   %12 = bitcast {}** %arraysize_ptr1 to i64*
   %arraysize2 = load i64, i64* %12, align 8
; └
;  @ abstractarray.jl:2129 within `hvcat_fill!`
; ┌ @ int.jl:88 within `*`
   %13 = mul i64 %arraysize2, %arraysize
; └
; ┌ @ operators.jl:276 within `!=`
; │┌ @ promotion.jl:521 within `==`
    %.not = icmp eq i64 %13, 4
; └└
  br i1 %.not, label %L12, label %L7

L7:                                               ; preds = %top
  %.sub = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 0
;  @ abstractarray.jl:2130 within `hvcat_fill!`
; ┌ @ strings/io.jl:189 within `string`
   %ptls_field58 = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
   %14 = bitcast {}*** %ptls_field58 to i8**
   %ptls_load5960 = load i8*, i8** %14, align 8
   %box = call noalias nonnull dereferenceable(32) {}* @ijl_gc_pool_alloc(i8* %ptls_load5960, i32 1184, i32 32) #10
   %15 = bitcast {}* %box to i64*
   %16 = getelementptr inbounds i64, i64* %15, i64 -1
   store atomic i64 139659734328144, i64* %16 unordered, align 8
   %17 = bitcast {}* %box to i8*
   store i64 %arraysize, i64* %15, align 8
   %newstruct.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8* %17, i64 8
   %newstruct.sroa.2.0..sroa_cast = bitcast i8* %newstruct.sroa.2.0..sroa_idx to i64*
   store i64 %arraysize2, i64* %newstruct.sroa.2.0..sroa_cast, align 8
   %18 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 2
   store {}* %box, {}** %18, align 16
   store {}* inttoptr (i64 139659873951040 to {}*), {}** %.sub, align 8
   %19 = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 1
   store {}* inttoptr (i64 139659960362176 to {}*), {}** %19, align 8
   %20 = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 2
   store {}* inttoptr (i64 139659873950976 to {}*), {}** %20, align 8
   %21 = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 3
   store {}* %box, {}** %21, align 8
   %22 = call nonnull {}* @j1_print_to_string_627({}* inttoptr (i64 139659736712096 to {}*), {}** nonnull %.sub, i32 4)
   store {}* %22, {}** %18, align 16
; └
  store {}* %22, {}** %.sub, align 8
  %23 = call nonnull {}* @ijl_apply_generic({}* inttoptr (i64 139659805161680 to {}*), {}** nonnull %.sub, i32 1)
  call void @ijl_throw({}* %23)
  unreachable

L12:                                              ; preds = %top
;  @ abstractarray.jl:2133 within `hvcat_fill!`
; ┌ @ range.jl:897 within `iterate`
; │┌ @ range.jl:672 within `isempty`
; ││┌ @ operators.jl:376 within `>`
; │││┌ @ int.jl:83 within `<`
      %.not43.not = icmp eq i64 %arraysize, 0
; └└└└
  br i1 %.not43.not, label %L88, label %L28.preheader

L28.preheader:                                    ; preds = %L12
  %.not45.not = icmp eq i64 %arraysize2, 0
  %24 = bitcast { i64, double, i64, double }* %1 to i8*
  %25 = bitcast {}* %0 to double**
;  @ abstractarray.jl:2134 within `hvcat_fill!`
  br i1 %.not45.not, label %L88, label %L28

L28:                                              ; preds = %L76.loopexit, %L28.preheader
  %value_phi7 = phi i64 [ %50, %L76.loopexit ], [ 1, %L28.preheader ]
  %value_phi9 = phi i64 [ %48, %L76.loopexit ], [ 1, %L28.preheader ]
  %26 = add nsw i64 %value_phi7, -1
  %arrayptr3450 = load double*, double** %25, align 8
;  @ abstractarray.jl:2135 within `hvcat_fill!`
  br label %L47

L47:                                              ; preds = %L64, %L28
  %value_phi14 = phi i64 [ %48, %L64 ], [ %value_phi9, %L28 ]
  %value_phi15 = phi i64 [ %49, %L64 ], [ 1, %L28 ]
; ┌ @ tuple.jl:31 within `getindex`
   %27 = add i64 %value_phi14, -1
   %ptls_field5461 = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
   %28 = bitcast {}*** %ptls_field5461 to i8**
   %ptls_load556263 = load i8*, i8** %28, align 8
   %box18 = call noalias nonnull dereferenceable(48) {}* @ijl_gc_pool_alloc(i8* %ptls_load556263, i32 1232, i32 48) #10
   %29 = bitcast {}* %box18 to i64*
   %30 = getelementptr inbounds i64, i64* %29, i64 -1
   store atomic i64 139654969175504, i64* %30 unordered, align 8
   %31 = bitcast {}* %box18 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(32) %31, i8* noundef nonnull align 8 dereferenceable(32) %24, i64 32, i1 false)
   %32 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 2
   store {}* %box18, {}** %32, align 16
   %33 = call {}* @ijl_get_nth_field_checked({}* nonnull %box18, i64 %27)
; └
  %34 = bitcast {}* %33 to i64*
  %35 = getelementptr inbounds i64, i64* %34, i64 -1
  %36 = load atomic i64, i64* %35 unordered, align 8
  %37 = and i64 %36, -16
  switch i64 %37, label %L62 [
    i64 139659806303152, label %L53
    i64 256, label %L58
  ]

L53:                                              ; preds = %L47
; ┌ @ array.jl:1022 within `setindex!`
   %38 = add nsw i64 %value_phi15, -1
   %39 = mul i64 %arraysize, %38
   %40 = add i64 %26, %39
   %memcpy_refined_dst = getelementptr inbounds double, double* %arrayptr3450, i64 %40
   %41 = bitcast {}* %33 to double*
   %42 = load double, double* %41, align 8
   store double %42, double* %memcpy_refined_dst, align 8
; └
  br label %L64

L58:                                              ; preds = %L47
; ┌ @ array.jl:1022 within `setindex!`
; │┌ @ number.jl:7 within `convert`
; ││┌ @ float.jl:159 within `Float64`
     %unbox = load i64, i64* %34, align 8
     %43 = sitofp i64 %unbox to double
; │└└
   %44 = add nsw i64 %value_phi15, -1
   %45 = mul i64 %arraysize, %44
   %46 = add i64 %26, %45
   %47 = getelementptr inbounds double, double* %arrayptr3450, i64 %46
   store double %43, double* %47, align 8
; └
  br label %L64

L62:                                              ; preds = %L47
  call void @ijl_throw({}* inttoptr (i64 139659795150016 to {}*))
  unreachable

L64:                                              ; preds = %L58, %L53
;  @ abstractarray.jl:2136 within `hvcat_fill!`
; ┌ @ int.jl:87 within `+`
   %48 = add i64 %value_phi14, 1
; └
;  @ abstractarray.jl:2137 within `hvcat_fill!`
; ┌ @ range.jl:901 within `iterate`
; │┌ @ promotion.jl:521 within `==`
    %.not48 = icmp eq i64 %value_phi15, %arraysize2
; │└
   %49 = add nuw nsw i64 %value_phi15, 1
; └
  br i1 %.not48, label %L76.loopexit, label %L47

L76.loopexit:                                     ; preds = %L64
;  @ abstractarray.jl:2138 within `hvcat_fill!`
; ┌ @ range.jl:901 within `iterate`
; │┌ @ promotion.jl:521 within `==`
    %.not49 = icmp eq i64 %value_phi7, %arraysize
; │└
   %50 = add nuw nsw i64 %value_phi7, 1
; └
  br i1 %.not49, label %L88, label %L28

L88:                                              ; preds = %L76.loopexit, %L28.preheader, %L12
  %51 = load {}*, {}** %6, align 8
  %52 = bitcast {}*** %tls_pgcstack to {}**
  store {}* %51, {}** %52, align 8
;  @ abstractarray.jl:2139 within `hvcat_fill!`
  ret {}* %0
}

wsmoses avatar Nov 04 '23 22:11 wsmoses

Could a test be added for this?

KristofferC avatar Feb 13 '24 12:02 KristofferC