julia
julia copied to clipboard
Type stabilize hvcat_fill!
Presently hvcat_fill is type unstable if the input tuple does not contain values of the same type.
This changes the iteration index to be of the tuple itself (permitting unrolling) and therefore type stabilization.
It would be really nice to have this backported to 1.10 before release (and other Julia's too, if possible).
Before this change it would behave like this:
julia> @code_llvm Base.hvcat_fill!(Matrix{Float64}(undef, 2, 2), (((1, 2.0, 3, 4.0)))
)
; @ abstractarray.jl:2126 within `hvcat_fill!`
define nonnull {}* @"julia_hvcat_fill!_625"({}* noundef nonnull align 16 dereferenceable(40) %0, { i64, double, i64, double }* nocapture noundef nonnull readonly align 8 dereferenceable(32) %1) #0 {
top:
%2 = alloca [4 x {}*], align 8
%gcframe57 = alloca [3 x {}*], align 16
%gcframe57.sub = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 0
%3 = bitcast [3 x {}*]* %gcframe57 to i8*
call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 true)
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #13
%tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
%4 = bitcast i8* %tls_ppgcstack to {}****
%tls_pgcstack = load {}***, {}**** %4, align 8
; @ abstractarray.jl:2127 within `hvcat_fill!`
; ┌ @ array.jl:190 within `size`
%5 = bitcast [3 x {}*]* %gcframe57 to i64*
store i64 4, i64* %5, align 16
%6 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 1
%7 = bitcast {}** %6 to {}***
%8 = load {}**, {}*** %tls_pgcstack, align 8
store {}** %8, {}*** %7, align 8
%9 = bitcast {}*** %tls_pgcstack to {}***
store {}** %gcframe57.sub, {}*** %9, align 8
%10 = bitcast {}* %0 to {}**
%arraysize_ptr = getelementptr inbounds {}*, {}** %10, i64 3
%11 = bitcast {}** %arraysize_ptr to i64*
%arraysize = load i64, i64* %11, align 8
%arraysize_ptr1 = getelementptr inbounds {}*, {}** %10, i64 4
%12 = bitcast {}** %arraysize_ptr1 to i64*
%arraysize2 = load i64, i64* %12, align 8
; └
; @ abstractarray.jl:2129 within `hvcat_fill!`
; ┌ @ int.jl:88 within `*`
%13 = mul i64 %arraysize2, %arraysize
; └
; ┌ @ operators.jl:276 within `!=`
; │┌ @ promotion.jl:521 within `==`
%.not = icmp eq i64 %13, 4
; └└
br i1 %.not, label %L12, label %L7
L7: ; preds = %top
%.sub = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 0
; @ abstractarray.jl:2130 within `hvcat_fill!`
; ┌ @ strings/io.jl:189 within `string`
%ptls_field58 = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
%14 = bitcast {}*** %ptls_field58 to i8**
%ptls_load5960 = load i8*, i8** %14, align 8
%box = call noalias nonnull dereferenceable(32) {}* @ijl_gc_pool_alloc(i8* %ptls_load5960, i32 1184, i32 32) #10
%15 = bitcast {}* %box to i64*
%16 = getelementptr inbounds i64, i64* %15, i64 -1
store atomic i64 139659734328144, i64* %16 unordered, align 8
%17 = bitcast {}* %box to i8*
store i64 %arraysize, i64* %15, align 8
%newstruct.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8* %17, i64 8
%newstruct.sroa.2.0..sroa_cast = bitcast i8* %newstruct.sroa.2.0..sroa_idx to i64*
store i64 %arraysize2, i64* %newstruct.sroa.2.0..sroa_cast, align 8
%18 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 2
store {}* %box, {}** %18, align 16
store {}* inttoptr (i64 139659873951040 to {}*), {}** %.sub, align 8
%19 = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 1
store {}* inttoptr (i64 139659960362176 to {}*), {}** %19, align 8
%20 = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 2
store {}* inttoptr (i64 139659873950976 to {}*), {}** %20, align 8
%21 = getelementptr inbounds [4 x {}*], [4 x {}*]* %2, i64 0, i64 3
store {}* %box, {}** %21, align 8
%22 = call nonnull {}* @j1_print_to_string_627({}* inttoptr (i64 139659736712096 to {}*), {}** nonnull %.sub, i32 4)
store {}* %22, {}** %18, align 16
; └
store {}* %22, {}** %.sub, align 8
%23 = call nonnull {}* @ijl_apply_generic({}* inttoptr (i64 139659805161680 to {}*), {}** nonnull %.sub, i32 1)
call void @ijl_throw({}* %23)
unreachable
L12: ; preds = %top
; @ abstractarray.jl:2133 within `hvcat_fill!`
; ┌ @ range.jl:897 within `iterate`
; │┌ @ range.jl:672 within `isempty`
; ││┌ @ operators.jl:376 within `>`
; │││┌ @ int.jl:83 within `<`
%.not43.not = icmp eq i64 %arraysize, 0
; └└└└
br i1 %.not43.not, label %L88, label %L28.preheader
L28.preheader: ; preds = %L12
%.not45.not = icmp eq i64 %arraysize2, 0
%24 = bitcast { i64, double, i64, double }* %1 to i8*
%25 = bitcast {}* %0 to double**
; @ abstractarray.jl:2134 within `hvcat_fill!`
br i1 %.not45.not, label %L88, label %L28
L28: ; preds = %L76.loopexit, %L28.preheader
%value_phi7 = phi i64 [ %50, %L76.loopexit ], [ 1, %L28.preheader ]
%value_phi9 = phi i64 [ %48, %L76.loopexit ], [ 1, %L28.preheader ]
%26 = add nsw i64 %value_phi7, -1
%arrayptr3450 = load double*, double** %25, align 8
; @ abstractarray.jl:2135 within `hvcat_fill!`
br label %L47
L47: ; preds = %L64, %L28
%value_phi14 = phi i64 [ %48, %L64 ], [ %value_phi9, %L28 ]
%value_phi15 = phi i64 [ %49, %L64 ], [ 1, %L28 ]
; ┌ @ tuple.jl:31 within `getindex`
%27 = add i64 %value_phi14, -1
%ptls_field5461 = getelementptr inbounds {}**, {}*** %tls_pgcstack, i64 2
%28 = bitcast {}*** %ptls_field5461 to i8**
%ptls_load556263 = load i8*, i8** %28, align 8
%box18 = call noalias nonnull dereferenceable(48) {}* @ijl_gc_pool_alloc(i8* %ptls_load556263, i32 1232, i32 48) #10
%29 = bitcast {}* %box18 to i64*
%30 = getelementptr inbounds i64, i64* %29, i64 -1
store atomic i64 139654969175504, i64* %30 unordered, align 8
%31 = bitcast {}* %box18 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(32) %31, i8* noundef nonnull align 8 dereferenceable(32) %24, i64 32, i1 false)
%32 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe57, i64 0, i64 2
store {}* %box18, {}** %32, align 16
%33 = call {}* @ijl_get_nth_field_checked({}* nonnull %box18, i64 %27)
; └
%34 = bitcast {}* %33 to i64*
%35 = getelementptr inbounds i64, i64* %34, i64 -1
%36 = load atomic i64, i64* %35 unordered, align 8
%37 = and i64 %36, -16
switch i64 %37, label %L62 [
i64 139659806303152, label %L53
i64 256, label %L58
]
L53: ; preds = %L47
; ┌ @ array.jl:1022 within `setindex!`
%38 = add nsw i64 %value_phi15, -1
%39 = mul i64 %arraysize, %38
%40 = add i64 %26, %39
%memcpy_refined_dst = getelementptr inbounds double, double* %arrayptr3450, i64 %40
%41 = bitcast {}* %33 to double*
%42 = load double, double* %41, align 8
store double %42, double* %memcpy_refined_dst, align 8
; └
br label %L64
L58: ; preds = %L47
; ┌ @ array.jl:1022 within `setindex!`
; │┌ @ number.jl:7 within `convert`
; ││┌ @ float.jl:159 within `Float64`
%unbox = load i64, i64* %34, align 8
%43 = sitofp i64 %unbox to double
; │└└
%44 = add nsw i64 %value_phi15, -1
%45 = mul i64 %arraysize, %44
%46 = add i64 %26, %45
%47 = getelementptr inbounds double, double* %arrayptr3450, i64 %46
store double %43, double* %47, align 8
; └
br label %L64
L62: ; preds = %L47
call void @ijl_throw({}* inttoptr (i64 139659795150016 to {}*))
unreachable
L64: ; preds = %L58, %L53
; @ abstractarray.jl:2136 within `hvcat_fill!`
; ┌ @ int.jl:87 within `+`
%48 = add i64 %value_phi14, 1
; └
; @ abstractarray.jl:2137 within `hvcat_fill!`
; ┌ @ range.jl:901 within `iterate`
; │┌ @ promotion.jl:521 within `==`
%.not48 = icmp eq i64 %value_phi15, %arraysize2
; │└
%49 = add nuw nsw i64 %value_phi15, 1
; └
br i1 %.not48, label %L76.loopexit, label %L47
L76.loopexit: ; preds = %L64
; @ abstractarray.jl:2138 within `hvcat_fill!`
; ┌ @ range.jl:901 within `iterate`
; │┌ @ promotion.jl:521 within `==`
%.not49 = icmp eq i64 %value_phi7, %arraysize
; │└
%50 = add nuw nsw i64 %value_phi7, 1
; └
br i1 %.not49, label %L88, label %L28
L88: ; preds = %L76.loopexit, %L28.preheader, %L12
%51 = load {}*, {}** %6, align 8
%52 = bitcast {}*** %tls_pgcstack to {}**
store {}* %51, {}** %52, align 8
; @ abstractarray.jl:2139 within `hvcat_fill!`
ret {}* %0
}
Could a test be added for this?