julia inlining: relax finalizer inlining control-flow restriction

Eager finalizer inlining (#45272) currently has a restriction that requires all the def/uses to be in a same basic block.

This commit relaxes that restriction a bit by allowing def/uses to involve control flow when all of them are dominated by a finalizer call to be inlined, since in that case it is safe to insert the body of finalizer at the end of all the def/uses, e.g.

const FINALIZATION_COUNT = Ref(0)
init_finalization_count!() = FINALIZATION_COUNT[] = 0
get_finalization_count() = FINALIZATION_COUNT[]
@noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
@noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
mutable struct DoAllocWithFieldInter
    x::Int
end
function register_finalizer!(obj::DoAllocWithFieldInter)
    finalizer(obj) do this
        add_finalization_count!(this.x)
    end
end

function cfg_finalization3(io)
    for i = -999:1000
        o = DoAllocWithFieldInter(i)
        register_finalizer!(o)
        if i == 1000
            safeprint(io, o.x, '\n')
        elseif i > 0
            safeprint(io, o.x)
        end
    end
end
let src = code_typed1(cfg_finalization3, (IO,))
    @test count(isinvoke(:add_finalization_count!), src.code) == 1
end
let
    init_finalization_count!()
    cfg_finalization3(IOBuffer())
    @test get_finalization_count() == 1000
end

There is a room for further improvement though -- if we have a way to compute the post-domination, we can also inline a finalizer call in a case when a finalizer block post-dominateds all the def/uses e.g.

function cfg_finalization5(io)
    for i = -999:1000
        o = DoAllocWithFieldInter(i)
        if i == 1000
            safeprint(io, o.x, '\n')
        elseif i > 0
            safeprint(io, o.x)
        end
        register_finalizer!(o)
    end
end
let src = code_typed1(cfg_finalization5, (IO,))
    # TODO we can fix this case by checking a case when a finalizer block is post-dominated by all the def/uses
    @test_broken count(isinvoke(:nothrow_side_effect), src.code) == 1
end

Sep 06 '22 15:09 aviatesk

With this restriction lifted, it's no longer sufficient to just inline the finalizer after the numerically last SSA value that uses it. Something needs to ensure the finalizer happens after the last use on any path to the exit:

julia> @noinline nothrow_side_effect(x) =
           Base.@assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
nothrow_side_effect (generic function with 1 method)

julia> mutable struct DoAllocWithField
           x
           function DoAllocWithField(x)
               finalizer(new(x)) do this
                   nothrow_side_effect(this.x)
               end
           end
       end

julia> function foo(b::Bool)
           s = DoAllocWithField(1)
           if b
               nothrow_side_effect(s.x)
               return 1
           else
               return 0
           end
       end
foo (generic function with 1 method)

julia> code_typed(foo, Tuple{Bool})
1-element Vector{Any}:
 CodeInfo(
1 ─ %1 = %new(Main.:(var"#3#4"))::var"#3#4"
│   %2 = %new(Main.DoAllocWithField, 1)::DoAllocWithField
└──      goto #3 if not b
2 ─ %4 = Base.getfield(%2, :x)::Any
│        invoke %1(%2::DoAllocWithField)::Nothing
│        Main.nothrow_side_effect(%4)::Any
└──      return 1
3 ─      return 0
) => Int64

julia> foo(true)
1
1
1

julia> foo(false)
0

Sep 07 '22 08:09 Keno

Just FYI, multiple windows buildbots hardlocked on this PR, and I had to manually reboot them. I'm not sure if that's a coincidence or not, but you can see the builds here:

https://build.julialang.org/#/builders/29/builds/10109
https://build.julialang.org/#/builders/72/builds/10250
https://build.julialang.org/#/builders/72/builds/10236

So perhaps something on this branch causes things to jam up in a bad way? Of course, it is a failing of the current windows buildbots that they do not more forcefully kill the windows processes and get stuck like this, but that's something we are working to fix in the Buildkite era.

Sep 08 '22 18:09 staticfloat

I've implemented the post-dominator analysis and updated this. I think this should be reasonably complete now. @aviatesk take a look, please.

Sep 21 '22 03:09 Keno

Looks great, thanks sooo much for implementing the post-domination analysis, that is very exciting itself. I think there seems to be a problem in finalizer insertion point though, MRE would be:

const FINALIZATION_COUNT = Ref(0)
init_finalization_count!() = FINALIZATION_COUNT[] = 0
get_finalization_count() = FINALIZATION_COUNT[]
@noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
@noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))

mutable struct DoAllocWithFieldInter
    x::Int
end
function register_finalizer!(obj::DoAllocWithFieldInter)
    finalizer(obj) do this
        add_finalization_count!(this.x)
    end
end

function cfg_finalization6(io)
    for i = -999:1000
        o = DoAllocWithFieldInter(0)
        register_finalizer!(o)
        if i == 1000
            o.x = i # with `setfield!`
        elseif i > 0
            safeprint(io, o.x, '\n')
        end
        # <= shouldn't the finalizer be inlined here?
    end
end
let src = code_typed1(cfg_finalization6, (IO,))
    @test count(isinvoke(:add_finalization_count!), src.code) == 1
end
let
    init_finalization_count!()
    cfg_finalization6(IOBuffer())
    @test get_finalization_count() == 1000 # this fails
end

Currently the finalizer is inlined after the allocation site, so can't observe the field value changed by setfield!:

julia> src = code_typed1(cfg_finalization6, (IO,))
CodeInfo(
1 ──       goto #11 if not true
2 ┄─ %2  = φ (#1 => -999, #10 => %20)::Int64
│    %3  = φ (#1 => -999, #10 => %21)::Int64
│    %4  = %new(Main.DoAllocWithFieldInter, 0)::DoAllocWithFieldInter
│    %5  = Base.getfield(%4, :x)::Int64
│          invoke Main.add_finalization_count!(%5::Int64)::Int64
│    %7  = (%2 === 1000)::Bool
└───       goto #4 if not %7
3 ──       Base.setfield!(%4, :x, %2)::Int64
└───       goto #6
4 ── %11 = Base.slt_int(0, %2)::Bool
└───       goto #6 if not %11
5 ── %13 = Base.getfield(%4, :x)::Int64
└───       invoke Main.safeprint(io::IO, %13::Any, '\n'::Vararg{Any})::Any
6 ┄─ %15 = (%3 === 1000)::Bool
└───       goto #8 if not %15
7 ──       goto #9
8 ── %18 = Base.add_int(%3, 1)::Int64
└───       goto #9
9 ┄─ %20 = φ (#8 => %18)::Int64
│    %21 = φ (#8 => %18)::Int64
│    %22 = φ (#7 => true, #8 => false)::Bool
│    %23 = Base.not_int(%22)::Bool
└───       goto #11 if not %23
10 ─       goto #2
11 ┄       return nothing
)

Sep 21 '22 09:09 aviatesk

I think there seems to be a problem in finalizer insertion point though, MRE would be:

Should be fixed by the following, if you want to apply that and add the test

diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 00577526ac..1e9d616a96 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1105,8 +1105,8 @@ end
 is_nothrow(ir::IRCode, pc::Int) = (ir.stmts[pc][:flag] & IR_FLAG_NOTHROW) ≠ 0

 function reachable_blocks(cfg::CFG, from_bb, to_bb = nothing)
-    worklist = Int[]
-    visited = BitSet()
+    worklist = Int[from_bb]
+    visited = BitSet(from_bb)
     if to_bb !== nothing
         push!(visited, to_bb)
     end
@@ -1116,7 +1116,6 @@ function reachable_blocks(cfg::CFG, from_bb, to_bb = nothing)
             push!(worklist, bb)
         end
     end
-    visit!(from_bb)
     while !isempty(worklist)
         foreach(visit!, cfg.blocks[pop!(worklist)].succs)
     end
@@ -1148,7 +1147,15 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
     bb_insert_idx = finalizer_idx
     function note_block_use!(bb, idx)
         new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree), bb_insert_block, bb)
-        bb_insert_idx = new_bb_insert_block == bb_insert_block ? max(idx, bb_insert_idx) : nothing
+        if new_bb_insert_block == bb
+            if bb == bb_insert_block
+                bb_insert_idx = max(bb_insert_idx, idx)
+            else
+                bb_insert_idx = idx
+            end
+        else
+            bb_insert_idx = nothing
+        end
         bb_insert_block = new_bb_insert_block
         nothing
     end

Plus an appropriate adjustment to the other use of reachable_blocks:

-         blocks = reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+        # Collect all reachable blocks between the finalizer registration and the
+        # insertion point
+        blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
+            reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)

Sep 21 '22 10:09 Keno

julia julia copied to clipboard

inlining: relax finalizer inlining control-flow restriction

julia
julia copied to clipboard