From 8a5fd27fddd7513885172721c95d5ec2f201993a Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 1 Oct 2024 21:29:34 +0900 Subject: [PATCH] optimizer: allow EA-powered `finalizer` inlining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g. this allows `finalizer` inlining in the following case: ```julia mutable struct ForeignBuffer{T} const ptr::Ptr{T} end const foreign_buffer_finalized = Ref(false) function foreign_alloc(::Type{T}, length) where T ptr = Libc.malloc(sizeof(T) * length) ptr = Base.unsafe_convert(Ptr{T}, ptr) obj = ForeignBuffer{T}(ptr) return finalizer(obj) do obj Base.@assume_effects :notaskstate :nothrow foreign_buffer_finalized[] = true Libc.free(obj.ptr) end end function f_EA_finalizer(N::Int) workspace = foreign_alloc(Float64, N) GC.@preserve workspace begin (;ptr) = workspace Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr) end end ``` ```julia julia> @code_typed f_EA_finalizer(42) CodeInfo( 1 ── %1 = Base.mul_int(8, N)::Int64 │ %2 = Core.lshr_int(%1, 63)::Int64 │ %3 = Core.trunc_int(Core.UInt8, %2)::UInt8 │ %4 = Core.eq_int(%3, 0x01)::Bool └─── goto #3 if not %4 2 ── invoke Core.throw_inexacterror(:convert::Symbol, UInt64::Type, %1::Int64)::Union{} └─── unreachable 3 ── goto #4 4 ── %9 = Core.bitcast(Core.UInt64, %1)::UInt64 └─── goto #5 5 ── goto #6 6 ── goto #7 7 ── goto #8 8 ── %14 = $(Expr(:foreigncall, :(:malloc), Ptr{Nothing}, svec(UInt64), 0, :(:ccall), :(%9), :(%9)))::Ptr{Nothing} └─── goto #9 9 ── %16 = Base.bitcast(Ptr{Float64}, %14)::Ptr{Float64} │ %17 = %new(ForeignBuffer{Float64}, %16)::ForeignBuffer{Float64} └─── goto #10 10 ─ %19 = $(Expr(:gc_preserve_begin, :(%17))) │ %20 = Base.getfield(%17, :ptr)::Ptr{Float64} │ invoke Main.println(Main.devnull::Base.DevNull, "ptr = "::String, %20::Ptr{Float64})::Nothing │ $(Expr(:gc_preserve_end, :(%19))) │ %23 = Main.foreign_buffer_finalized::Base.RefValue{Bool} │ Base.setfield!(%23, :x, true)::Bool │ %25 = Base.getfield(%17, :ptr)::Ptr{Float64} │ %26 = Base.bitcast(Ptr{Nothing}, %25)::Ptr{Nothing} │ $(Expr(:foreigncall, :(:free), Nothing, svec(Ptr{Nothing}), 0, :(:ccall), :(%26), :(%25)))::Nothing └─── return nothing ) => Nothing ``` However, this is still a WIP. Before merging, I want to improve EA's precision a bit and at least fix the test case that is currently marked as `broken`. I also need to check its impact on compiler performance. Additionally, I believe this feature is not yet practical. In particular, there is still significant room for improvement in the following areas: - EA's interprocedural capabilities: currently EA is performed ad-hoc for limited frames because of latency reasons, which significantly reduces its precision in the presence of interprocedural calls. - Relaxing the `:nothrow` check for finalizer inlining: the current algorithm requires `:nothrow`-ness on all paths from the allocation of the mutable struct to its last use, which is not practical for real-world cases. Even when `:nothrow` cannot be guaranteed, auxiliary optimizations such as inserting a `finalize` call after the last use might still be possible. --- base/compiler/optimize.jl | 2 +- .../ssair/EscapeAnalysis/EscapeAnalysis.jl | 51 ++++++++---- base/compiler/ssair/passes.jl | 79 +++++++++++++------ base/compiler/types.jl | 2 + test/compiler/EscapeAnalysis/EAUtils.jl | 1 + test/compiler/codegen.jl | 2 +- test/compiler/effects.jl | 2 +- test/compiler/inline.jl | 30 +++++++ 8 files changed, 127 insertions(+), 42 deletions(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 1971b47323f5d9..99151420ab8769 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -648,7 +648,7 @@ function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState) if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending) ir = sv.ir nargs = let def = sv.result.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end - estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp)) + estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp)) argescapes = EscapeAnalysis.ArgEscapeCache(estate) stack_analysis_result!(sv.result, argescapes) validate_mutable_arg_escapes!(estate, sv) diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl index 6967efe495be10..dae6e2a8b74ac2 100644 --- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl +++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl @@ -18,7 +18,7 @@ import ._TOP_MOD: ==, getindex, setindex! using Core: MethodMatch, SimpleVector, ifelse, sizeof using Core.IR using ._TOP_MOD: # Base definitions - @__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline, + @__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline, @show, @nospecialize, @specialize, BitSet, Callable, Csize_t, IdDict, IdSet, UnitRange, Vector, copy, delete!, empty!, enumerate, error, first, get, get!, haskey, in, isassigned, isempty, ismutabletype, keys, last, length, max, min, missing, pop!, push!, pushfirst!, @@ -657,11 +657,13 @@ function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_e # `escape_exception!` conservatively propagates `AllEscape` anyway, # and so escape information imposed on `:the_exception` isn't computed continue + elseif head === :gc_preserve_begin + # GC preserve is handled by `escape_gc_preserve!` + elseif head === :gc_preserve_end + escape_gc_preserve!(astate, pc, stmt.args) elseif head === :static_parameter || # this exists statically, not interested in its escape - head === :copyast || # XXX can this account for some escapes? - head === :isdefined || # just returns `Bool`, nothing accounts for any escapes - head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere - head === :gc_preserve_end # `GC.@preserve` expressions themselves won't be used anywhere + head === :copyast || # XXX escape something? + head === :isdefined # just returns `Bool`, nothing accounts for any escapes continue else add_conservative_changes!(astate, pc, stmt.args) @@ -1062,17 +1064,21 @@ end function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any}) mi = first(args)::MethodInstance first_idx, last_idx = 2, length(args) + add_liveness_changes!(astate, pc, args, first_idx, last_idx) # TODO inspect `astate.ir.stmts[pc][:info]` and use const-prop'ed `InferenceResult` if available cache = astate.get_escape_cache(mi) + ret = SSAValue(pc) if cache isa Bool if cache + for (i, argidx) in enumerate(first_idx:last_idx) + add_alias_change!(astate, ret, args[argidx]) + end return nothing # guaranteed to have no escape else return add_conservative_changes!(astate, pc, args, 2) end end cache = cache::ArgEscapeCache - ret = SSAValue(pc) retinfo = astate.estate[ret] # escape information imposed on the call statement method = mi.def::Method nargs = Int(method.nargs) @@ -1160,6 +1166,17 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any}) end end +function escape_gc_preserve!(astate::AnalysisState, pc::Int, args::Vector{Any}) + @assert length(args) == 1 "invalid :gc_preserve_end" + val = args[1] + @assert val isa SSAValue "invalid :gc_preserve_end" + beginstmt = astate.ir[val][:stmt] + @assert isexpr(beginstmt, :gc_preserve_begin) "invalid :gc_preserve_end" + beginargs = beginstmt.args + # COMBAK we might need to add liveness for all statements from `:gc_preserve_begin` to `:gc_preserve_end` + add_liveness_changes!(astate, pc, beginargs) +end + normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}) @@ -1185,20 +1202,12 @@ function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}) if result === missing # if this call hasn't been handled by any of pre-defined handlers, escape it conservatively add_conservative_changes!(astate, pc, args) - return elseif result === true add_liveness_changes!(astate, pc, args, 2) - return # ThrownEscape is already checked + elseif is_nothrow(astate.ir, pc) + add_liveness_changes!(astate, pc, args, 2) else - # we escape statements with the `ThrownEscape` property using the effect-freeness - # computed by `stmt_effect_flags` invoked within inlining - # TODO throwness ≠ "effect-free-ness" - if is_nothrow(astate.ir, pc) - add_liveness_changes!(astate, pc, args, 2) - else - add_fallback_changes!(astate, pc, args, 2) - end - return + add_fallback_changes!(astate, pc, args, 2) end end @@ -1526,4 +1535,12 @@ function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any}) add_liveness_changes!(astate, pc, args, 6) end +function escape_builtin!(::typeof(Core.finalizer), astate::AnalysisState, pc::Int, args::Vector{Any}) + if length(args) ≥ 3 + obj = args[3] + add_liveness_change!(astate, obj, pc) # TODO setup a proper FinalizerEscape? + end + return false +end + end # baremodule EscapeAnalysis diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 3981f7382d7072..8edd3b1d10721e 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1300,7 +1300,13 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) # Inlining performs legality checks on the finalizer to determine # whether or not we may inline it. If so, it appends extra arguments # at the end of the intrinsic. Detect that here. - length(stmt.args) == 5 || continue + if length(stmt.args) == 4 && stmt.args[4] === nothing + # constant case + elseif length(stmt.args) == 5 && stmt.args[4] isa Bool && stmt.args[5] isa MethodInstance + # inlining case + else + continue + end end is_finalizer = true elseif isexpr(stmt, :foreigncall) @@ -1685,7 +1691,32 @@ end function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState}) 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) lazypostdomtree = LazyPostDomtree(ir) + function find_finalizer_idx(defuse::SSADefUse) + finalizer_idx = nothing + for use in defuse.uses + if use.kind === :finalizer + # For now: Only allow one finalizer per allocation + finalizer_idx !== nothing && return false + finalizer_idx = use.idx + end + end + if finalizer_idx === nothing + return true + elseif inlining === nothing + return false + end + return finalizer_idx + end for (defidx, (intermediaries, defuse)) in defuses + # Find the type for this allocation + defexpr = ir[SSAValue(defidx)][:stmt] + isexpr(defexpr, :new) || continue + typ = unwrap_unionall(ir.stmts[defidx][:type]) + # Could still end up here if we tried to setfield! on an immutable, which would + # error at runtime, but is not illegal to have in the IR. + typ = widenconst(typ) + ismutabletype(typ) || continue + typ = typ::DataType # Check if there are any uses we did not account for. If so, the variable # escapes and we cannot eliminate the allocation. This works, because we're guaranteed # not to include any intermediaries that have dead uses. As a result, missing uses will only ever @@ -1696,29 +1727,33 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}} nuses += used_ssas[iidx] end nuses_total = used_ssas[defidx] + nuses - length(intermediaries) - nleaves == nuses_total || continue - # Find the type for this allocation - defexpr = ir[SSAValue(defidx)][:stmt] - isexpr(defexpr, :new) || continue - typ = unwrap_unionall(ir.stmts[defidx][:type]) - # Could still end up here if we tried to setfield! on an immutable, which would - # error at runtime, but is not illegal to have in the IR. - typ = widenconst(typ) - ismutabletype(typ) || continue - typ = typ::DataType - # First check for any finalizer calls - finalizer_idx = nothing - for use in defuse.uses - if use.kind === :finalizer - # For now: Only allow one finalizer per allocation - finalizer_idx !== nothing && @goto skip - finalizer_idx = use.idx + if nleaves ≠ nuses_total + finalizer_idx = find_finalizer_idx(defuse) + if finalizer_idx isa Int + nargs = length(ir.argtypes) # TODO + estate = EscapeAnalysis.analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache(inlining.interp)) + einfo = estate[SSAValue(defidx)] + if EscapeAnalysis.has_no_escape(einfo) + already = BitSet(use.idx for use in defuse.uses) + for idx = einfo.Liveness + if idx ∉ already + push!(defuse.uses, SSAUse(:EALiveness, idx)) + end + end + try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState, + lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info]) + end end - end - if finalizer_idx !== nothing && inlining !== nothing - try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining, - lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info]) continue + else + finalizer_idx = find_finalizer_idx(defuse) + if finalizer_idx isa Int + try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState, + lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info]) + continue + elseif !finalizer_idx + continue + end end # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] diff --git a/base/compiler/types.jl b/base/compiler/types.jl index c51785f23ea29d..2cc3a13c49ffc1 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -452,6 +452,8 @@ typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.i ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance) optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance +get_escape_cache(interp::AbstractInterpreter) = GetNativeEscapeCache(interp) + abstract type CallInfo end @nospecialize diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl index 188ec93ebc5bec..bd0a0cb003a214 100644 --- a/test/compiler/EscapeAnalysis/EAUtils.jl +++ b/test/compiler/EscapeAnalysis/EAUtils.jl @@ -115,6 +115,7 @@ CC.OptimizationParams(interp::EscapeAnalyzer) = interp.opt_params CC.get_inference_world(interp::EscapeAnalyzer) = interp.world CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache CC.cache_owner(::EscapeAnalyzer) = EAToken() +CC.get_escape_cache(interp::EscapeAnalyzer) = GetEscapeCache(interp) function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult) # run EA on all frames that have been optimized diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 26ae965b353197..ae042509645549 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -266,7 +266,7 @@ if opt_level > 0 @test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir) end -# Issue 22770 +# Issue JuliaLang/julia#22770 let was_gced = false @noinline make_tuple(x) = tuple(x) @noinline use(x) = ccall(:jl_breakpoint, Cvoid, ()) diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl index 8bc5f27e317666..c286658ecd5755 100644 --- a/test/compiler/effects.jl +++ b/test/compiler/effects.jl @@ -1135,7 +1135,7 @@ end # These need EA @test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_1, (Base.RefValue{Int},))) -@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},))) +@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},))) @test Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_ref!, (Base.RefValue{Int},))) @test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_1, (Vector{Int},))) @test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},))) diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index fceb9203524826..c482c5d3320958 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -2223,3 +2223,33 @@ let src = code_typed1(bar_split_error, Tuple{}) @test count(iscall((src, foo_split)), src.code) == 0 @test count(iscall((src, Core.throw_methoderror)), src.code) > 0 end + +# finalizer inlining with EA +mutable struct ForeignBuffer{T} + const ptr::Ptr{T} +end +const foreign_buffer_finalized = Ref(false) +function foreign_alloc(::Type{T}, length) where T + ptr = Libc.malloc(sizeof(T) * length) + ptr = Base.unsafe_convert(Ptr{T}, ptr) + obj = ForeignBuffer{T}(ptr) + return finalizer(obj) do obj + Base.@assume_effects :notaskstate :nothrow + foreign_buffer_finalized[] = true + Libc.free(obj.ptr) + end +end +function f_EA_finalizer(N::Int) + workspace = foreign_alloc(Float64, N) + GC.@preserve workspace begin + (;ptr) = workspace + Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr) + end +end +let src = code_typed1(f_EA_finalizer, (Int,)) + @test count(iscall((src, Core.finalizer)), src.code) == 0 +end +let;Base.Experimental.@force_compile + f_EA_finalizer(42000) + @test foreign_buffer_finalized[] +end