julia
julia copied to clipboard
Avoid using the -O0 code emission because it has several bugs
This only affects debug builds of julia but i've found multiple issues with the backends with O0. The latest being a crash while compiling the following module. To reproduce do llc -O0
with LLVM 16, given that the "fast" backend implementations have repeating issues I propose to just never use them.
; ModuleID = 'cconvert'
source_filename = "cconvert"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-apple-darwin23.4.0"
@"+Core.Tuple#18792" = external global ptr
@"jl_global#18794" = external global ptr
@__stack_chk_guard = external global ptr
; Function Attrs: sspstrong
define swiftcc nonnull ptr @julia_cconvert_18789(ptr nonnull swiftself %pgcstack, ptr noundef nonnull %"T::<unknown type>", <8 x i64> %"x::Tuple") #0 !dbg !8 {
top:
%StackGuardSlot = alloca ptr, align 8
%0 = call ptr @llvm.stackguard()
call void @llvm.stackprotector(ptr %0, ptr %StackGuardSlot)
%jlcallframe = alloca ptr, i32 2, align 8
%gcframe = alloca ptr, i32 3, align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe, i8 0, i64 24, i1 true)
%frame.nroots = getelementptr inbounds ptr, ptr %gcframe, i32 0
store i64 4, ptr %frame.nroots, align 8, !tbaa !27
%task.gcstack = load ptr, ptr %pgcstack, align 8
%frame.prev = getelementptr inbounds ptr, ptr %gcframe, i32 1
store ptr %task.gcstack, ptr %frame.prev, align 8, !tbaa !27
store ptr %gcframe, ptr %pgcstack, align 8
call void @llvm.dbg.declare(metadata ptr %"T::<unknown type>", metadata !25, metadata !DIExpression()), !dbg !31
call void @llvm.dbg.value(metadata <8 x i64> %"x::Tuple", metadata !26, metadata !DIExpression()), !dbg !31
%current_task1 = getelementptr inbounds ptr, ptr %pgcstack, i64 -14
%ptls_field = getelementptr inbounds ptr, ptr %current_task1, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8, !tbaa !27
%1 = getelementptr inbounds ptr, ptr %ptls_load, i64 2
%safepoint = load ptr, ptr %1, align 8, !tbaa !32
fence syncscope("singlethread") seq_cst
%2 = load volatile i64, ptr %safepoint, align 8, !dbg !31
fence syncscope("singlethread") seq_cst
%"+Core.Tuple#18792" = load ptr, ptr @"+Core.Tuple#18792", align 8, !dbg !31, !tbaa !32, !alias.scope !34, !noalias !37, !nonnull !17, !dereferenceable !42, !align !43
%Tuple = ptrtoint ptr %"+Core.Tuple#18792" to i64, !dbg !31
%3 = inttoptr i64 %Tuple to ptr, !dbg !31
%current_task2 = getelementptr inbounds ptr, ptr %pgcstack, i64 -14, !dbg !31
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe, i32 2
store ptr %3, ptr %gc_slot_addr_0, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %3, i64 16) ], !dbg !31
%ptls_field18 = getelementptr inbounds ptr, ptr %current_task2, i64 16, !dbg !31
%ptls_load19 = load ptr, ptr %ptls_field18, align 8, !dbg !31, !tbaa !27
%4 = ptrtoint ptr %3 to i64, !dbg !31
%"box::Tuple" = call noalias nonnull align 8 dereferenceable(64) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load19, i32 944, i32 80, i64 %4) #6, !dbg !31
%"box::Tuple.tag_addr" = getelementptr inbounds ptr, ptr %"box::Tuple", i64 -1, !dbg !31
store atomic ptr %3, ptr %"box::Tuple.tag_addr" unordered, align 8, !dbg !31, !tbaa !44
store <8 x i64> %"x::Tuple", ptr %"box::Tuple", align 8, !dbg !31, !tbaa !47, !alias.scope !50, !noalias !51
%gc_slot_addr_011 = getelementptr inbounds ptr, ptr %gcframe, i32 2
store ptr %"box::Tuple", ptr %gc_slot_addr_011, align 8
%5 = getelementptr inbounds ptr, ptr %jlcallframe, i32 0, !dbg !31
store ptr %"box::Tuple", ptr %5, align 8, !dbg !31
%6 = getelementptr inbounds ptr, ptr %jlcallframe, i32 1, !dbg !31
store ptr %"T::<unknown type>", ptr %6, align 8, !dbg !31
%jl_f_isa_ret = call nonnull ptr @jl_f_isa(ptr null, ptr %jlcallframe, i32 2), !dbg !31
%jl_f_isa_ret.unbox = load i8, ptr %jl_f_isa_ret, align 1, !dbg !31, !tbaa !47, !range !52, !alias.scope !50, !noalias !51
%7 = trunc i8 %jl_f_isa_ret.unbox to i1, !dbg !31
%8 = xor i1 %7, true, !dbg !31
br i1 %8, label %L4, label %L3, !dbg !31
common.ret: ; preds = %L4, %L3
%common.ret.op = phi ptr [ %"box::Tuple6", %L3 ], [ %16, %L4 ]
%9 = getelementptr inbounds ptr, ptr %gcframe, i32 1
%frame.prev28 = load ptr, ptr %9, align 8, !tbaa !27
store ptr %frame.prev28, ptr %pgcstack, align 8, !tbaa !27
ret ptr %common.ret.op, !dbg !31
L3: ; preds = %top
%"+Core.Tuple#187923" = load ptr, ptr @"+Core.Tuple#18792", align 8, !dbg !31, !tbaa !32, !alias.scope !34, !noalias !37, !nonnull !17, !dereferenceable !42, !align !43
%Tuple4 = ptrtoint ptr %"+Core.Tuple#187923" to i64, !dbg !31
%10 = inttoptr i64 %Tuple4 to ptr, !dbg !31
%current_task5 = getelementptr inbounds ptr, ptr %pgcstack, i64 -14, !dbg !31
%gc_slot_addr_012 = getelementptr inbounds ptr, ptr %gcframe, i32 2
store ptr %10, ptr %gc_slot_addr_012, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %10, i64 16) ], !dbg !31
%ptls_field22 = getelementptr inbounds ptr, ptr %current_task5, i64 16, !dbg !31
%ptls_load23 = load ptr, ptr %ptls_field22, align 8, !dbg !31, !tbaa !27
%11 = ptrtoint ptr %10 to i64, !dbg !31
%"box::Tuple6" = call noalias nonnull align 8 dereferenceable(64) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load23, i32 944, i32 80, i64 %11) #6, !dbg !31
%"box::Tuple6.tag_addr" = getelementptr inbounds ptr, ptr %"box::Tuple6", i64 -1, !dbg !31
store atomic ptr %10, ptr %"box::Tuple6.tag_addr" unordered, align 8, !dbg !31, !tbaa !44
store <8 x i64> %"x::Tuple", ptr %"box::Tuple6", align 8, !dbg !31, !tbaa !47, !alias.scope !50, !noalias !51
br label %common.ret
L4: ; preds = %top
%"jl_global#18794" = load ptr, ptr @"jl_global#18794", align 8, !dbg !31, !tbaa !32, !alias.scope !34, !noalias !37, !nonnull !17
%"+Core.Tuple#187927" = load ptr, ptr @"+Core.Tuple#18792", align 8, !dbg !31, !tbaa !32, !alias.scope !34, !noalias !37, !nonnull !17, !dereferenceable !42, !align !43
%Tuple8 = ptrtoint ptr %"+Core.Tuple#187927" to i64, !dbg !31
%12 = inttoptr i64 %Tuple8 to ptr, !dbg !31
%current_task9 = getelementptr inbounds ptr, ptr %pgcstack, i64 -14, !dbg !31
%gc_slot_addr_013 = getelementptr inbounds ptr, ptr %gcframe, i32 2
store ptr %12, ptr %gc_slot_addr_013, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %12, i64 16) ], !dbg !31
%ptls_field25 = getelementptr inbounds ptr, ptr %current_task9, i64 16, !dbg !31
%ptls_load26 = load ptr, ptr %ptls_field25, align 8, !dbg !31, !tbaa !27
%13 = ptrtoint ptr %12 to i64, !dbg !31
%"box::Tuple10" = call noalias nonnull align 8 dereferenceable(64) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load26, i32 944, i32 80, i64 %13) #6, !dbg !31
%"box::Tuple10.tag_addr" = getelementptr inbounds ptr, ptr %"box::Tuple10", i64 -1, !dbg !31
store atomic ptr %12, ptr %"box::Tuple10.tag_addr" unordered, align 8, !dbg !31, !tbaa !44
store <8 x i64> %"x::Tuple", ptr %"box::Tuple10", align 8, !dbg !31, !tbaa !47, !alias.scope !50, !noalias !51
%gc_slot_addr_014 = getelementptr inbounds ptr, ptr %gcframe, i32 2
store ptr %"box::Tuple10", ptr %gc_slot_addr_014, align 8
%14 = getelementptr inbounds ptr, ptr %jlcallframe, i32 0, !dbg !31
store ptr %"T::<unknown type>", ptr %14, align 8, !dbg !31
%15 = getelementptr inbounds ptr, ptr %jlcallframe, i32 1, !dbg !31
store ptr %"box::Tuple10", ptr %15, align 8, !dbg !31
%16 = call nonnull ptr @ijl_apply_generic(ptr %"jl_global#18794", ptr %jlcallframe, i32 2), !dbg !31
br label %common.ret
}
; Function Attrs: noinline optnone
define nonnull ptr @jfptr_cconvert_18790(ptr %"function::Core.Function", ptr noalias nocapture noundef readonly %"args::Any[]", i32 %"nargs::UInt32") #1 {
top:
%pgcstack = call ptr inttoptr (i64 6936518052 to ptr)(i64 261) #9
%0 = getelementptr inbounds ptr, ptr %"args::Any[]", i32 0
%1 = load ptr, ptr %0, align 8, !tbaa !32, !alias.scope !34, !noalias !37, !nonnull !17
%2 = getelementptr inbounds ptr, ptr %"args::Any[]", i32 1
%3 = load ptr, ptr %2, align 8, !tbaa !32, !alias.scope !34, !noalias !37, !nonnull !17, !dereferenceable !53, !align !43
%4 = load <8 x i64>, ptr %3, align 16
%5 = call swiftcc nonnull ptr @julia_cconvert_18789(ptr nonnull swiftself %pgcstack, ptr %1, <8 x i64> %4)
ret ptr %5
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.declare(metadata %0, metadata %1, metadata %2) #2
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.value(metadata %0, metadata %1, metadata %2) #2
; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
declare void @julia.safepoint(ptr %0) #3
declare nonnull ptr @jl_f_isa(ptr %0, ptr noalias nocapture noundef readonly %1, i32 %2)
declare nonnull ptr @julia.call(ptr %0, ptr %1, ...)
; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
declare noalias nonnull ptr @julia.gc_alloc_obj(ptr %0, i64 %1, ptr %2) #4
declare nonnull ptr @ijl_apply_generic(ptr %0, ptr noalias nocapture noundef readonly %1, i32 %2)
declare noalias nonnull ptr @julia.new_gc_frame(i32 %0)
declare void @julia.push_gc_frame(ptr %0, i32 %1)
declare ptr @julia.get_gc_frame_slot(ptr %0, i32 %1)
declare void @julia.pop_gc_frame(ptr %0)
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
declare void @llvm.assume(i1 noundef %0) #5
; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
declare noalias nonnull ptr @julia.gc_alloc_bytes(ptr %0, i64 %1, i64 %2) #4
; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
declare void @ijl_gc_queue_root(ptr %0) #3
; Function Attrs: nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite)
declare noalias nonnull ptr @ijl_gc_pool_alloc_instrumented(ptr %0, i32 %1, i32 %2, i64 %3) #6
; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
declare noalias nonnull ptr @ijl_gc_big_alloc_instrumented(ptr %0, i64 %1, i64 %2) #4
; Function Attrs: nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
declare noalias nonnull ptr @ijl_gc_alloc_typed(ptr %0, i64 %1, i64 %2) #4
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #7
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare ptr @llvm.stackguard() #8
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.stackprotector(ptr %0, ptr %1) #8
attributes #0 = { sspstrong "julia.fsig"="Base.cconvert(Type, NTuple{8, VecElement{UInt64}})" "probe-stack"="inline-asm" }
attributes #1 = { noinline optnone "probe-stack"="inline-asm" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
attributes #4 = { nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
attributes #6 = { nounwind willreturn allockind("alloc") allocsize(2) memory(argmem: read, inaccessiblemem: readwrite) }
attributes #7 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #8 = { nocallback nofree nosync nounwind willreturn }
attributes #9 = { nounwind memory(none) }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
!llvm.dbg.cu = !{!6}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"stack-protector-guard", !"global"}
!3 = !{i32 2, !"julia.debug_level", i32 2}
!4 = !{i32 1, !"julia.__jit_debug_tsm_addr", i64 6171590768}
!5 = !{i32 2, !"julia.optlevel", i32 0}
!6 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !7, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: GNU)
!7 = !DIFile(filename: "julia", directory: ".")
!8 = distinct !DISubprogram(name: "cconvert", linkageName: "julia_cconvert_18789", scope: null, file: !9, line: 662, type: !10, scopeLine: 662, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !23)
!9 = !DIFile(filename: "essentials.jl", directory: ".")
!10 = !DISubroutineType(types: !11)
!11 = !{!12, !16, !12, !18}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64, align: 64)
!13 = !DICompositeType(tag: DW_TAG_structure_type, name: "jl_value_t", file: !14, line: 71, align: 64, elements: !15)
!14 = !DIFile(filename: "julia.h", directory: "")
!15 = !{!12}
!16 = !DICompositeType(tag: DW_TAG_structure_type, name: "#cconvert", align: 8, elements: !17, runtimeLang: DW_LANG_Julia, identifier: "4683186752")
!17 = !{}
!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "Tuple", size: 512, align: 512, elements: !19, runtimeLang: DW_LANG_Julia, identifier: "4699695088")
!19 = !{!20, !20, !20, !20, !20, !20, !20, !20}
!20 = !DICompositeType(tag: DW_TAG_structure_type, name: "VecElement", size: 64, align: 64, elements: !21, runtimeLang: DW_LANG_Julia, identifier: "4699693056")
!21 = !{!22}
!22 = !DIBasicType(name: "UInt64", size: 64, encoding: DW_ATE_unsigned)
!23 = !{!24, !25, !26}
!24 = !DILocalVariable(name: "#self#", arg: 1, scope: !8, file: !9, line: 662, type: !16)
!25 = !DILocalVariable(name: "T", arg: 2, scope: !8, file: !9, line: 662, type: !12)
!26 = !DILocalVariable(name: "x", arg: 3, scope: !8, file: !9, line: 662, type: !18)
!27 = !{!28, !28, i64 0}
!28 = !{!"jtbaa_gcframe", !29, i64 0}
!29 = !{!"jtbaa", !30, i64 0}
!30 = !{!"jtbaa"}
!31 = !DILocation(line: 662, scope: !8)
!32 = !{!33, !33, i64 0}
!33 = !{!"jtbaa_const", !29, i64 0}
!34 = !{!35}
!35 = !{!"jnoalias_const", !36}
!36 = !{!"jnoalias"}
!37 = !{!38, !39, !40, !41}
!38 = !{!"jnoalias_gcframe", !36}
!39 = !{!"jnoalias_stack", !36}
!40 = !{!"jnoalias_data", !36}
!41 = !{!"jnoalias_typemd", !36}
!42 = !{i64 56}
!43 = !{i64 16}
!44 = !{!45, !45, i64 0}
!45 = !{!"jtbaa_tag", !46, i64 0}
!46 = !{!"jtbaa_data", !29, i64 0}
!47 = !{!48, !48, i64 0}
!48 = !{!"jtbaa_immut", !49, i64 0}
!49 = !{!"jtbaa_value", !46, i64 0}
!50 = !{!40}
!51 = !{!38, !39, !41, !35}
!52 = !{i8 0, i8 2}
!53 = !{i64 64}
I've been running a large number of tests with different aggressiveness settings for the different pipelines. I do not have a complete set of results yet, but here are a few observations related to this PR:
- We've seen no correctness issues with
O0
and we've been running some of our test suite at this optimization level for at least a couple of years so far. Can you provide further detail on what the problem is and why you want to eliminate this? Losing the ability to run withO0
is not desirable for us -- it will cost us a lot of CI time. - We find that
Default
aggressiveness for theO1
pipeline appears to have almost no effect on compile time but shows a slight regression on runtime wrtO2
(which usesDefault
). In our experiments,Less
(instead ofNone
) onO1
seems to offer the right compromise for aggressiveness (i.e. @Sacha0's change in this PR) -- it notably reduces compile time and slightly increases runtime.
Happy to share some of our testing results on Slack, if you are interested.
That module triggers an llvm assertion for example, and we disable fast isel on two targets already and have seen issues before. Does this really make that much of a difference in some tests? Also I imagine x86 has more coverage than aarch64 which was the case above.
We don't have exhaustive results on this test yet, but early indications are that O1
with None
aggressiveness is only slightly slower than O0
, so if there are clear problems with O0
, then this might be a workable alternative.
Also, we find that -O1
with Less
aggressiveness is a good sweet spot -- we see ~20% faster "cold" runs and only ~10% slower "warm" runs. So we'd like to merge https://github.com/JuliaLang/julia/pull/37893. Would you take a look at that please @gbaraldi?
Maybe we can reopen sacha's PR (or push to it) to open an updated standalone PR for just the O1 less change, so that can be merged separately from the rest of the changes here?
Oh, wait, actually Sacha's PR is up to date. Sorry for my confusion! Yeah, let's merge that on its own! What do you think @gbaraldi?
Oh, wait, actually https://github.com/JuliaLang/julia/pull/37893 is up to date.
It is?! After almost four years?! Color me shocked 😂
Closed by https://github.com/JuliaLang/julia/pull/37893