julia
julia copied to clipboard
inlining: relax finalizer inlining control-flow restriction
Eager finalizer inlining (#45272) currently has a restriction that
requires all the def/uses to be in a same basic block.
This commit relaxes that restriction a bit by allowing def/uses to
involve control flow when all of them are dominated by a finalizer
call to be inlined, since in that case it is safe to insert the body of
finalizer at the end of all the def/uses, e.g.
const FINALIZATION_COUNT = Ref(0)
init_finalization_count!() = FINALIZATION_COUNT[] = 0
get_finalization_count() = FINALIZATION_COUNT[]
@noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
@noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
mutable struct DoAllocWithFieldInter
x::Int
end
function register_finalizer!(obj::DoAllocWithFieldInter)
finalizer(obj) do this
add_finalization_count!(this.x)
end
end
function cfg_finalization3(io)
for i = -999:1000
o = DoAllocWithFieldInter(i)
register_finalizer!(o)
if i == 1000
safeprint(io, o.x, '\n')
elseif i > 0
safeprint(io, o.x)
end
end
end
let src = code_typed1(cfg_finalization3, (IO,))
@test count(isinvoke(:add_finalization_count!), src.code) == 1
end
let
init_finalization_count!()
cfg_finalization3(IOBuffer())
@test get_finalization_count() == 1000
end
There is a room for further improvement though -- if we have a way to
compute the post-domination, we can also inline a finalizer call
in a case when a finalizer block post-dominateds all the def/uses e.g.
function cfg_finalization5(io)
for i = -999:1000
o = DoAllocWithFieldInter(i)
if i == 1000
safeprint(io, o.x, '\n')
elseif i > 0
safeprint(io, o.x)
end
register_finalizer!(o)
end
end
let src = code_typed1(cfg_finalization5, (IO,))
# TODO we can fix this case by checking a case when a finalizer block is post-dominated by all the def/uses
@test_broken count(isinvoke(:nothrow_side_effect), src.code) == 1
end
With this restriction lifted, it's no longer sufficient to just inline the finalizer after the numerically last SSA value that uses it. Something needs to ensure the finalizer happens after the last use on any path to the exit:
julia> @noinline nothrow_side_effect(x) =
Base.@assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
nothrow_side_effect (generic function with 1 method)
julia> mutable struct DoAllocWithField
x
function DoAllocWithField(x)
finalizer(new(x)) do this
nothrow_side_effect(this.x)
end
end
end
julia> function foo(b::Bool)
s = DoAllocWithField(1)
if b
nothrow_side_effect(s.x)
return 1
else
return 0
end
end
foo (generic function with 1 method)
julia> code_typed(foo, Tuple{Bool})
1-element Vector{Any}:
CodeInfo(
1 ─ %1 = %new(Main.:(var"#3#4"))::var"#3#4"
│ %2 = %new(Main.DoAllocWithField, 1)::DoAllocWithField
└── goto #3 if not b
2 ─ %4 = Base.getfield(%2, :x)::Any
│ invoke %1(%2::DoAllocWithField)::Nothing
│ Main.nothrow_side_effect(%4)::Any
└── return 1
3 ─ return 0
) => Int64
julia> foo(true)
1
1
1
julia> foo(false)
0
Just FYI, multiple windows buildbots hardlocked on this PR, and I had to manually reboot them. I'm not sure if that's a coincidence or not, but you can see the builds here:
- https://build.julialang.org/#/builders/29/builds/10109
- https://build.julialang.org/#/builders/72/builds/10250
- https://build.julialang.org/#/builders/72/builds/10236
So perhaps something on this branch causes things to jam up in a bad way? Of course, it is a failing of the current windows buildbots that they do not more forcefully kill the windows processes and get stuck like this, but that's something we are working to fix in the Buildkite era.
I've implemented the post-dominator analysis and updated this. I think this should be reasonably complete now. @aviatesk take a look, please.
Looks great, thanks sooo much for implementing the post-domination analysis, that is very exciting itself. I think there seems to be a problem in finalizer insertion point though, MRE would be:
const FINALIZATION_COUNT = Ref(0)
init_finalization_count!() = FINALIZATION_COUNT[] = 0
get_finalization_count() = FINALIZATION_COUNT[]
@noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
@noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))
mutable struct DoAllocWithFieldInter
x::Int
end
function register_finalizer!(obj::DoAllocWithFieldInter)
finalizer(obj) do this
add_finalization_count!(this.x)
end
end
function cfg_finalization6(io)
for i = -999:1000
o = DoAllocWithFieldInter(0)
register_finalizer!(o)
if i == 1000
o.x = i # with `setfield!`
elseif i > 0
safeprint(io, o.x, '\n')
end
# <= shouldn't the finalizer be inlined here?
end
end
let src = code_typed1(cfg_finalization6, (IO,))
@test count(isinvoke(:add_finalization_count!), src.code) == 1
end
let
init_finalization_count!()
cfg_finalization6(IOBuffer())
@test get_finalization_count() == 1000 # this fails
end
Currently the finalizer is inlined after the allocation site, so can't observe the field value changed by setfield!:
julia> src = code_typed1(cfg_finalization6, (IO,))
CodeInfo(
1 ── goto #11 if not true
2 ┄─ %2 = φ (#1 => -999, #10 => %20)::Int64
│ %3 = φ (#1 => -999, #10 => %21)::Int64
│ %4 = %new(Main.DoAllocWithFieldInter, 0)::DoAllocWithFieldInter
│ %5 = Base.getfield(%4, :x)::Int64
│ invoke Main.add_finalization_count!(%5::Int64)::Int64
│ %7 = (%2 === 1000)::Bool
└─── goto #4 if not %7
3 ── Base.setfield!(%4, :x, %2)::Int64
└─── goto #6
4 ── %11 = Base.slt_int(0, %2)::Bool
└─── goto #6 if not %11
5 ── %13 = Base.getfield(%4, :x)::Int64
└─── invoke Main.safeprint(io::IO, %13::Any, '\n'::Vararg{Any})::Any
6 ┄─ %15 = (%3 === 1000)::Bool
└─── goto #8 if not %15
7 ── goto #9
8 ── %18 = Base.add_int(%3, 1)::Int64
└─── goto #9
9 ┄─ %20 = φ (#8 => %18)::Int64
│ %21 = φ (#8 => %18)::Int64
│ %22 = φ (#7 => true, #8 => false)::Bool
│ %23 = Base.not_int(%22)::Bool
└─── goto #11 if not %23
10 ─ goto #2
11 ┄ return nothing
)
I think there seems to be a problem in finalizer insertion point though, MRE would be:
Should be fixed by the following, if you want to apply that and add the test
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 00577526ac..1e9d616a96 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1105,8 +1105,8 @@ end
is_nothrow(ir::IRCode, pc::Int) = (ir.stmts[pc][:flag] & IR_FLAG_NOTHROW) ≠ 0
function reachable_blocks(cfg::CFG, from_bb, to_bb = nothing)
- worklist = Int[]
- visited = BitSet()
+ worklist = Int[from_bb]
+ visited = BitSet(from_bb)
if to_bb !== nothing
push!(visited, to_bb)
end
@@ -1116,7 +1116,6 @@ function reachable_blocks(cfg::CFG, from_bb, to_bb = nothing)
push!(worklist, bb)
end
end
- visit!(from_bb)
while !isempty(worklist)
foreach(visit!, cfg.blocks[pop!(worklist)].succs)
end
@@ -1148,7 +1147,15 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
bb_insert_idx = finalizer_idx
function note_block_use!(bb, idx)
new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree), bb_insert_block, bb)
- bb_insert_idx = new_bb_insert_block == bb_insert_block ? max(idx, bb_insert_idx) : nothing
+ if new_bb_insert_block == bb
+ if bb == bb_insert_block
+ bb_insert_idx = max(bb_insert_idx, idx)
+ else
+ bb_insert_idx = idx
+ end
+ else
+ bb_insert_idx = nothing
+ end
bb_insert_block = new_bb_insert_block
nothing
end
Plus an appropriate adjustment to the other use of reachable_blocks:
- blocks = reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+ # Collect all reachable blocks between the finalizer registration and the
+ # insertion point
+ blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
+ reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)