From 9c70320fdf4fca7c400cd46abcf71c171a1b2b8a Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 23 Jun 2021 20:22:15 +0900 Subject: [PATCH] optimizer: supports callsite annotations of inlining, fixes #18773 Enable `@inline`/`@noinline` annotations on function callsites. From #40754. Now `@inline` and `@noinline` can be applied to a code block and then the compiler will try to (not) inline calls within the block: ```julia @inline f(...) # The compiler will try to inline `f` @inline f(...) + g(...) # The compiler will try to inline `f`, `g` and `+` @inline f(args...) = ... # Of course annotations on a definition is still allowed ``` Here are couple of notes on how those callsite annotations will work: - callsite annotation always has the precedence over the annotation applied to the definition of the called function, whichever we use `@inline`/`@noinline`: ```julia @inline function explicit_inline(args...) # body end let @noinline explicit_inline(args...) # this call will not be inlined end ``` - when callsite annotations are nested, the innermost annotations has the precedence ```julia @noinline let a0, b0 = ... a = @inline f(a0) # the compiler will try to inline this call b = notinlined(b0) # the compiler will NOT try to inline this call return a, b end ``` They're both tested and included in documentations. --- base/compiler/abstractinterpretation.jl | 11 +- base/compiler/optimize.jl | 59 +++++++-- base/compiler/ssair/inlining.jl | 60 +++++----- base/compiler/typeinfer.jl | 2 +- base/compiler/types.jl | 1 - base/compiler/utilities.jl | 5 +- base/compiler/validation.jl | 4 +- base/expr.jl | 130 ++++++++++++++++++-- base/meta.jl | 3 +- src/ast.scm | 2 +- src/codegen.cpp | 4 +- src/interpreter.c | 2 +- src/julia-syntax.scm | 6 +- src/macroexpand.scm | 2 +- src/method.c | 3 +- test/compiler/inline.jl | 151 +++++++++++++++++++++--- 16 files changed, 358 insertions(+), 87 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 4af4eb672bb6d..25cb2f1187538 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -595,7 +595,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me return nothing end mi = mi::MethodInstance - if !force && !const_prop_methodinstance_heuristic(interp, method, mi) + if !force && !const_prop_methodinstance_heuristic(interp, match, mi) add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic") return nothing end @@ -699,7 +699,8 @@ end # This is a heuristic to avoid trying to const prop through complicated functions # where we would spend a lot of time, but are probably unlikely to get an improved # result anyway. -function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance) +function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance) + method = match.method if method.is_for_opaque_closure # Not inlining an opaque closure can be very expensive, so be generous # with the const-prop-ability. It is quite possible that we can't infer @@ -717,7 +718,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method if isdefined(code, :inferred) && !cache_inlineable cache_inf = code.inferred if !(cache_inf === nothing) - cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing + # TODO maybe we want to respect callsite `@inline`/`@noinline` annotations here ? + cache_inlineable = inlining_policy(interp, cache_inf, 0x00, match) !== nothing end end if !cache_inlineable @@ -1896,7 +1898,8 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) if isa(fname, SlotNumber) changes = StateUpdate(fname, VarState(Any, false), changes, false) end - elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect + elseif hd === :code_coverage_effect || + (hd !== :boundscheck && hd !== nothing && is_meta_expr_head(hd)) # :boundscheck can be narrowed to Bool # these do not generate code else t = abstract_eval_statement(interp, stmt, changes, frame) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 1898aa8b75778..3b7b437ce6c6d 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -21,21 +21,30 @@ function push!(et::EdgeTracker, ci::CodeInstance) push!(et, ci.def) end -struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P} +struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter} params::OptimizationParams et::S mi_cache::T - policy::P + interp::I end -function default_inlining_policy(@nospecialize(src)) +function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8, match::Union{MethodMatch,InferenceResult}) if isa(src, CodeInfo) || isa(src, Vector{UInt8}) src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src) - src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src) + src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src) return src_inferred && src_inlineable ? src : nothing - end - if isa(src, OptimizationState) && isdefined(src, :ir) - return src.src.inlineable ? src.ir : nothing + elseif isa(src, OptimizationState) && isdefined(src, :ir) + return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing + elseif src === nothing && is_stmt_inline(stmt_flag) && isa(match, MethodMatch) + # when the source isn't available at this moment, try to re-infer and inline it + # NOTE we can make inference try to keep the source if the call is going to be inlined, + # but then inlining will depend on local state of inference and so the first entry + # and the succeeding ones may generate different code; rather we always re-infer + # the source to avoid the problem while it's obviously not most efficient + # HACK disable inlining for the re-inference to avoid cycles by making sure the following inference never comes here again + interp = NativeInterpreter(get_world_counter(interp); opt_params = OptimizationParams(; inlining = false)) + src, rt = typeinf_code(interp, match.method, match.spec_types, match.sparams, true) + return src end return nothing end @@ -57,7 +66,7 @@ mutable struct OptimizationState inlining = InliningState(params, EdgeTracker(s_edges, frame.valid_worlds), WorldView(code_cache(interp), frame.world), - inlining_policy(interp)) + interp) return new(frame.linfo, frame.src, nothing, frame.stmt_info, frame.mod, frame.sptypes, frame.slottypes, false, @@ -86,7 +95,7 @@ mutable struct OptimizationState inlining = InliningState(params, nothing, WorldView(code_cache(interp), get_world_counter()), - inlining_policy(interp)) + interp) return new(linfo, src, nothing, stmt_info, mod, sptypes_from_meth_instance(linfo), slottypes, false, @@ -128,6 +137,10 @@ const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError # This statement was marked as @inbounds by the user. If replaced by inlining, # any contained boundschecks may be removed const IR_FLAG_INBOUNDS = 0x01 +# This statement was marked as @inline by the user +const IR_FLAG_INLINE = 0x01 << 1 +# This statement was marked as @noinline by the user +const IR_FLAG_NOINLINE = 0x01 << 2 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 @@ -173,6 +186,9 @@ function isinlineable(m::Method, me::OptimizationState, params::OptimizationPara return inlineable end +is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE != 0 +is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE != 0 + # These affect control flow within the function (so may not be removed # if there is no usage within the function), but don't affect the purity # of the function as a whole. @@ -359,6 +375,7 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv:: renumber_ir_elements!(code, changemap, labelmap) inbounds_depth = 0 # Number of stacked inbounds + inline_flags = BitVector() meta = Any[] flags = fill(0x00, length(code)) for i = 1:length(code) @@ -373,16 +390,38 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv:: inbounds_depth -= 1 end stmt = nothing + elseif isexpr(stmt, :inline) + if stmt.args[1]::Bool + push!(inline_flags, true) + else + pop!(inline_flags) + end + stmt = nothing + elseif isexpr(stmt, :noinline) + if stmt.args[1]::Bool + push!(inline_flags, false) + else + pop!(inline_flags) + end + stmt = nothing else stmt = normalize(stmt, meta) end code[i] = stmt - if !(stmt === nothing) + if stmt !== nothing if inbounds_depth > 0 flags[i] |= IR_FLAG_INBOUNDS end + if !isempty(inline_flags) + if last(inline_flags) + flags[i] |= IR_FLAG_INLINE + else + flags[i] |= IR_FLAG_NOINLINE + end + end end end + @assert isempty(inline_flags) "malformed meta flags" strip_trailing_junk!(ci, code, stmtinfo, flags) cfg = compute_basic_blocks(code) types = Any[] diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 7e7baff136741..420ec1a8998e3 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -604,6 +604,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: argexprs::Vector{Any}, atypes::Vector{Any}, arginfos::Vector{Any}, arg_start::Int, istate::InliningState) + flag = ir.stmts[idx][:flag] new_argexprs = Any[argexprs[arg_start]] new_atypes = Any[atypes[arg_start]] # loop over original arguments and flatten any known iterators @@ -659,8 +660,9 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: info = call.info handled = false if isa(info, ConstCallInfo) - if maybe_handle_const_call!(ir, state1.id, new_stmt, info, new_sig, - call.rt, istate, false, todo) + if !is_stmt_noinline(flag) && maybe_handle_const_call!( + ir, state1.id, new_stmt, info, new_sig,call.rt, istate, flag, false, todo) + handled = true else info = info.call @@ -671,7 +673,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: MethodMatchInfo[info] : info.matches # See if we can inline this call to `iterate` analyze_single_call!(ir, todo, state1.id, new_stmt, - new_sig, call.rt, info, istate) + new_sig, call.rt, info, istate, flag) end if i != length(thisarginfo.each) valT = getfield_tfunc(call.rt, Const(1)) @@ -719,7 +721,7 @@ function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo):: return mi end -function resolve_todo(todo::InliningTodo, state::InliningState) +function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) (; match) = todo.spec::DelayedInliningSpec #XXX: update_valid_age!(min_valid[1], max_valid[1], sv) @@ -755,9 +757,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState) return ConstantCase(src) end - if src !== nothing - src = state.policy(src) - end + src = inlining_policy(state.interp, src, flag, match) if src === nothing return compileable_specialization(et, match) @@ -771,9 +771,9 @@ function resolve_todo(todo::InliningTodo, state::InliningState) return InliningTodo(todo.mi, src) end -function resolve_todo(todo::UnionSplit, state::InliningState) +function resolve_todo(todo::UnionSplit, state::InliningState, flag::UInt8) UnionSplit(todo.fully_covered, todo.atype, - Pair{Any,Any}[sig=>resolve_todo(item, state) for (sig, item) in todo.cases]) + Pair{Any,Any}[sig=>resolve_todo(item, state, flag) for (sig, item) in todo.cases]) end function validate_sparams(sparams::SimpleVector) @@ -784,7 +784,7 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, atypes::Vector{Any}, - state::InliningState, @nospecialize(stmttyp)) + state::InliningState, @nospecialize(stmttyp), flag::UInt8) method = match.method methsig = method.sig @@ -804,7 +804,7 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any}, et = state.et - if !state.params.inlining + if !state.params.inlining || is_stmt_noinline(flag) return compileable_specialization(et, match) end @@ -818,7 +818,7 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any}, # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) state.mi_cache === nothing && return todo - return resolve_todo(todo, state) + return resolve_todo(todo, state, flag) end function InliningTodo(mi::MethodInstance, ir::IRCode) @@ -1043,7 +1043,7 @@ is_builtin(s::Signature) = s.ft ⊑ Builtin function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result)::InvokeCallInfo, - state::InliningState, todo::Vector{Pair{Int, Any}}) + state::InliningState, todo::Vector{Pair{Int, Any}}, flag::UInt8) stmt = ir.stmts[idx][:inst] calltype = ir.stmts[idx][:type] @@ -1057,17 +1057,17 @@ function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result): atypes = atypes[4:end] pushfirst!(atypes, atype0) - if isa(result, InferenceResult) + if isa(result, InferenceResult) && !is_stmt_noinline(flag) (; mi) = item = InliningTodo(result, atypes, calltype) validate_sparams(mi.sparam_vals) || return nothing if argtypes_to_type(atypes) <: mi.def.sig - state.mi_cache !== nothing && (item = resolve_todo(item, state)) + state.mi_cache !== nothing && (item = resolve_todo(item, state, flag)) handle_single_case!(ir, stmt, idx, item, true, todo) return nothing end end - result = analyze_method!(match, atypes, state, calltype) + result = analyze_method!(match, atypes, state, calltype, flag) handle_single_case!(ir, stmt, idx, result, true, todo) return nothing end @@ -1162,7 +1162,7 @@ end function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, @nospecialize(stmt), sig::Signature, @nospecialize(calltype), infos::Vector{MethodMatchInfo}, - state::InliningState) + state::InliningState, flag::UInt8) cases = Pair{Any, Any}[] signature_union = Union{} only_method = nothing # keep track of whether there is one matching method @@ -1196,7 +1196,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int fully_covered = false continue end - case = analyze_method!(match, sig.atypes, state, calltype) + case = analyze_method!(match, sig.atypes, state, calltype, flag) if case === nothing fully_covered = false continue @@ -1223,7 +1223,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int match = meth[1] end fully_covered = true - case = analyze_method!(match, sig.atypes, state, calltype) + case = analyze_method!(match, sig.atypes, state, calltype, flag) case === nothing && return push!(cases, Pair{Any,Any}(match.spec_types, case)) end @@ -1245,7 +1245,7 @@ end function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr, info::ConstCallInfo, sig::Signature, @nospecialize(calltype), - state::InliningState, + state::InliningState, flag::UInt8, isinvoke::Bool, todo::Vector{Pair{Int, Any}}) # when multiple matches are found, bail out and later inliner will union-split this signature # TODO effectively use multiple constant analysis results here @@ -1257,7 +1257,7 @@ function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr, validate_sparams(mi.sparam_vals) || return true mthd_sig = mi.def.sig mistypes = mi.specTypes - state.mi_cache !== nothing && (item = resolve_todo(item, state)) + state.mi_cache !== nothing && (item = resolve_todo(item, state, flag)) if sig.atype <: mthd_sig handle_single_case!(ir, stmt, idx, item, isinvoke, todo) return true @@ -1295,6 +1295,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) info = info.info end + flag = ir.stmts[idx][:flag] + # Inference determined this couldn't be analyzed. Don't question it. if info === false continue @@ -1304,7 +1306,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) # it'll have performed a specialized analysis for just this case. Use its # result. if isa(info, ConstCallInfo) - if maybe_handle_const_call!(ir, idx, stmt, info, sig, calltype, state, sig.f === Core.invoke, todo) + if !is_stmt_noinline(flag) && maybe_handle_const_call!( + ir, idx, stmt, info, sig, calltype, state, flag, sig.f === Core.invoke, todo) continue else info = info.call @@ -1312,7 +1315,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) end if isa(info, OpaqueClosureCallInfo) - result = analyze_method!(info.match, sig.atypes, state, calltype) + result = analyze_method!(info.match, sig.atypes, state, calltype, flag) handle_single_case!(ir, stmt, idx, result, false, todo) continue end @@ -1320,7 +1323,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) # Handle invoke if sig.f === Core.invoke if isa(info, InvokeCallInfo) - inline_invoke!(ir, idx, sig, info, state, todo) + inline_invoke!(ir, idx, sig, info, state, todo, flag) end continue end @@ -1334,7 +1337,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end - analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state) + analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state, flag) end todo end @@ -1399,7 +1402,8 @@ end function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::Expr, params::OptimizationParams) f, ft, atypes = sig.f, sig.ft, sig.atypes typ = ir.stmts[idx][:type] - if params.inlining && length(atypes) == 3 && istopfunction(f, :!==) + isinlining = params.inlining + if isinlining && length(atypes) == 3 && istopfunction(f, :!==) # special-case inliner for !== that precedes _methods_by_ftype union splitting # and that works, even though inference generally avoids inferring the `!==` Method if isa(typ, Const) @@ -1411,7 +1415,7 @@ function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::E not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa) ir[SSAValue(idx)] = not_call return true - elseif params.inlining && length(atypes) == 3 && istopfunction(f, :(>:)) + elseif isinlining && length(atypes) == 3 && istopfunction(f, :(>:)) # special-case inliner for issupertype # that works, even though inference generally avoids inferring the `>:` Method if isa(typ, Const) && _builtin_nothrow(<:, Any[atypes[3], atypes[2]], typ) @@ -1421,7 +1425,7 @@ function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::E subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2]) ir[SSAValue(idx)] = subtype_call return true - elseif params.inlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol) + elseif isinlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol) ir[SSAValue(idx)] = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2], length(stmt.args) < 4 ? Bottom : stmt.args[3], length(stmt.args) == 2 ? Any : stmt.args[end]) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index a3895c380f8bd..210782567190f 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -343,7 +343,7 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta nslots = length(ci.slotflags) resize!(ci.slottypes::Vector{Any}, nslots) resize!(ci.slotnames, nslots) - return ccall(:jl_compress_ir, Any, (Any, Any), def, ci) + return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci) else return ci end diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 1a89d5e994b15..c4c8158eb489c 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -218,7 +218,6 @@ may_discard_trees(::AbstractInterpreter) = true verbose_stmt_info(::AbstractInterpreter) = false method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp)) -inlining_policy(::AbstractInterpreter) = default_inlining_policy """ By default `AbstractInterpreter` implements the following inference bail out logic: diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index ed09d5316473a..26e25126c393c 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -59,7 +59,8 @@ end # Meta expression head, these generally can't be deleted even when they are # in a dead branch but can be ignored when analyzing uses/liveness. -is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo) +is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || + head === :loopinfo || head === :inline || head === :noinline) sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0 @@ -196,7 +197,7 @@ function specialize_method(method::Method, @nospecialize(atypes), sparams::Simpl if preexisting # check cached specializations # for an existing result stored there - return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes) + return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes)::Union{Nothing,MethodInstance} end return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atypes, sparams) end diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl index c152dfb9fa6a5..6e05c96cd7936 100644 --- a/base/compiler/validation.jl +++ b/base/compiler/validation.jl @@ -16,6 +16,8 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}( :leave => 1:1, :pop_exception => 1:1, :inbounds => 1:1, + :inline => 1:1, + :noinline => 1:1, :boundscheck => 0:0, :copyast => 1:1, :meta => 0:typemax(Int), @@ -141,7 +143,7 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_ head === :const || head === :enter || head === :leave || head === :pop_exception || head === :method || head === :global || head === :static_parameter || head === :new || head === :splatnew || head === :thunk || head === :loopinfo || - head === :throw_undef_if_not || head === :code_coverage_effect + head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline validate_val!(x) else # TODO: nothing is actually in statement position anymore diff --git a/base/expr.jl b/base/expr.jl index f583a58f78dc5..226dfe5f900a9 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -209,9 +209,51 @@ end !!! compat "Julia 1.8" The usage within a function body requires at least Julia 1.8. + +--- + @inline block + +Give a hint to the compiler that calls within `block` are worth inlining. + +```julia +# The compiler will try to inline `f` +@inline f(...) + +# The compiler will try to inline `f`, `g` and `+` +@inline f(...) + g(...) +``` + +!!! note + A callsite annotation always has the precedence over the annotation applied to the + definition of the called function: + ```julia + @noinline function explicit_noinline(args...) + # body + end + + let + @inline explicit_noinline(args...) # will be inlined + end + ``` + +!!! note + When there are nested callsite annotations, the innermost annotation has the precedence: + ```julia + @noinline let a0, b0 = ... + a = @inline f(a0) # the compiler will try to inline this call + b = f(b0) # the compiler will NOT try to inline this call + return a, b + end + ``` + +!!! compat "Julia 1.8" + The callsite annotation requires at least Julia 1.8. """ -macro inline(ex) - esc(isa(ex, Expr) ? pushmeta!(ex, :inline) : ex) +macro inline(x) + return annotate_meta_def_or_block(x, :inline) +end +macro inline() + return Expr(:meta, :inline) end """ @@ -244,11 +286,54 @@ end !!! compat "Julia 1.8" The usage within a function body requires at least Julia 1.8. +--- + @noinline block + +Give a hint to the compiler that it should not inline the calls within `block`. + +```julia +# The compiler will try to not inline `f` +@noinline f(...) + +# The compiler will try to not inline `f`, `g` and `+` +@noinline f(...) + g(...) +``` + +!!! note + A callsite annotation always has the precedence over the annotation applied to the + definition of the called function: + ```julia + @inline function explicit_inline(args...) + # body + end + + let + @noinline explicit_inline(args...) # will not be inlined + end + ``` + +!!! note + When there are nested callsite annotations, the innermost annotation has the precedence: + ```julia + @inline let a0, b0 = ... + a = @noinline f(a0) # the compiler will NOT try to inline this call + b = f(b0) # the compiler will try to inline this call + return a, b + end + ``` + +!!! compat "Julia 1.8" + The callsite annotation requires at least Julia 1.8. + +--- !!! note If the function is trivial (for example returning a constant) it might get inlined anyway. """ -macro noinline(ex) - esc(isa(ex, Expr) ? pushmeta!(ex, :noinline) : ex) +macro noinline(x) + return annotate_meta_def_or_block(x, :noinline) +end +macro noinline() + return Expr(:meta, :noinline) end """ @@ -301,6 +386,15 @@ end ## some macro utilities ## +unwrap_macrocalls(@nospecialize(x)) = x +function unwrap_macrocalls(ex::Expr) + inner = ex + while inner.head === :macrocall + inner = inner.args[end]::Expr + end + return inner +end + function pushmeta!(ex::Expr, sym::Symbol, args::Any...) if isempty(args) tag = sym @@ -308,10 +402,7 @@ function pushmeta!(ex::Expr, sym::Symbol, args::Any...) tag = Expr(sym, args...)::Expr end - inner = ex - while inner.head === :macrocall - inner = inner.args[end]::Expr - end + inner = unwrap_macrocalls(ex) idx, exargs = findmeta(inner) if idx != 0 @@ -361,8 +452,23 @@ function findmetaarg(metaargs, sym) return 0 end -function is_short_function_def(ex) - ex.head === :(=) || return false +function annotate_meta_def_or_block(@nospecialize(ex), meta::Symbol) + inner = unwrap_macrocalls(ex) + if is_function_def(inner) + # annotation on a definition + return esc(pushmeta!(ex, meta)) + else + # annotation on a block + return Expr(:block, + Expr(meta, true), + Expr(:local, Expr(:(=), :val, esc(ex))), + Expr(meta, false), + :val) + end +end + +function is_short_function_def(@nospecialize(ex)) + isexpr(ex, :(=)) || return false while length(ex.args) >= 1 && isa(ex.args[1], Expr) (ex.args[1].head === :call) && return true (ex.args[1].head === :where || ex.args[1].head === :(::)) || return false @@ -370,9 +476,11 @@ function is_short_function_def(ex) end return false end +is_function_def(@nospecialize(ex)) = + return isexpr(ex, :function) || is_short_function_def(ex) || isexpr(ex, :->) function findmeta(ex::Expr) - if ex.head === :function || is_short_function_def(ex) || ex.head === :-> + if is_function_def(ex) body = ex.args[2]::Expr body.head === :block || error(body, " is not a block expression") return findmeta_block(ex.args) diff --git a/base/meta.jl b/base/meta.jl index b483630a92f8f..3fe815cd0cbc0 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -450,6 +450,7 @@ end _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals) -is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo) +is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta + || head === :loopinfo || head === :inline || head === :noinline) end # module diff --git a/src/ast.scm b/src/ast.scm index bc8d847279fc9..e5148a507a4fd 100644 --- a/src/ast.scm +++ b/src/ast.scm @@ -289,7 +289,7 @@ ;; predicates and accessors (define (quoted? e) - (memq (car e) '(quote top core globalref outerref line break inert meta inbounds loopinfo))) + (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo))) (define (quotify e) `',e) (define (unquote e) (if (and (pair? e) (memq (car e) '(quote inert))) diff --git a/src/codegen.cpp b/src/codegen.cpp index 744d29629b0ca..69a697ceb096e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4424,7 +4424,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result) jl_value_t **args = (jl_value_t**)jl_array_data(ex->args); jl_sym_t *head = ex->head; if (head == meta_sym || head == inbounds_sym || head == coverageeffect_sym - || head == aliasscope_sym || head == popaliasscope_sym) { + || head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) { // some expression types are metadata and can be ignored // in statement position return; @@ -4859,7 +4859,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) } else if (head == leave_sym || head == coverageeffect_sym || head == pop_exception_sym || head == enter_sym || head == inbounds_sym - || head == aliasscope_sym || head == popaliasscope_sym) { + || head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) { jl_errorf("Expr(:%s) in value position", jl_symbol_name(head)); } else if (head == boundscheck_sym) { diff --git a/src/interpreter.c b/src/interpreter.c index 4686efe054edf..f999542d68c4f 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -312,7 +312,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s) return jl_true; } else if (head == meta_sym || head == coverageeffect_sym || head == inbounds_sym || head == loopinfo_sym || - head == aliasscope_sym || head == popaliasscope_sym) { + head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) { return jl_nothing; } else if (head == gc_preserve_begin_sym || head == gc_preserve_end_sym) { diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index f00ea0c9ba6d9..428b0513b7e52 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -3498,7 +3498,7 @@ f(x) = yt(x) thunk with-static-parameters toplevel-only global globalref outerref const-if-global thismodule const atomic null true false ssavalue isdefined toplevel module lambda - error gc_preserve_begin gc_preserve_end import using export))) + error gc_preserve_begin gc_preserve_end import using export inline noinline))) (define (local-in? s lam) (or (assq s (car (lam:vinfo lam))) @@ -4592,7 +4592,7 @@ f(x) = yt(x) (cons (car e) args))) ;; metadata expressions - ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope) + ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline) (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return)))) (cond ((eq? (car e) 'line) (set! current-loc e) @@ -4737,7 +4737,7 @@ f(x) = yt(x) (begin (set! linetable (cons (make-lineinfo name file line) linetable)) (set! current-loc 1))) (if (or reachable - (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope)))) + (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline)))) (begin (set! code (cons e code)) (set! i (+ i 1)) (set! locs (cons current-loc locs))))) diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 5e55c7bbb29c1..f17f4d3510dc6 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -352,7 +352,7 @@ ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg)))) (else `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg)))))) - ((using import export meta line inbounds boundscheck loopinfo) (map unescape e)) + ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e)) ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted. ((symboliclabel) e) ((symbolicgoto) e) diff --git a/src/method.c b/src/method.c index 22145a4349853..df8bef90852cf 100644 --- a/src/method.c +++ b/src/method.c @@ -84,7 +84,8 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve e->head == quote_sym || e->head == inert_sym || e->head == meta_sym || e->head == inbounds_sym || e->head == boundscheck_sym || e->head == loopinfo_sym || - e->head == aliasscope_sym || e->head == popaliasscope_sym) { + e->head == aliasscope_sym || e->head == popaliasscope_sym || + e->head == inline_sym || e->head == noinline_sym) { // ignore these } else { diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 9358dce3325f7..e788b7e56100e 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -389,7 +389,7 @@ function isinvoke(@nospecialize(x), pred) end return false end -code_typed1(args...; kwargs...) = (first∘first)(code_typed(args...; kwargs...))::Core.CodeInfo +code_typed1(args...; kwargs...) = (first(only(code_typed(args...; kwargs...)))::Core.CodeInfo).code @testset "@inline/@noinline annotation before definition" begin m = Module() @@ -414,24 +414,24 @@ code_typed1(args...; kwargs...) = (first∘first)(code_typed(args...; kwargs...) def_noinline_noconflict(x) = _def_noinline_noconflict(x) end - let ci = code_typed1(m.def_inline, (Int,)) - @test all(ci.code) do x + let code = code_typed1(m.def_inline, (Int,)) + @test all(code) do x !isinvoke(x, :_def_inline) end end - let ci = code_typed1(m.def_noinline, (Int,)) - @test any(ci.code) do x + let code = code_typed1(m.def_noinline, (Int,)) + @test any(code) do x isinvoke(x, :_def_noinline) end end # test that they don't conflict with other "before-definition" macros - let ci = code_typed1(m.def_inline_noconflict, (Int,)) - @test all(ci.code) do x + let code = code_typed1(m.def_inline_noconflict, (Int,)) + @test all(code) do x !isinvoke(x, :_def_inline_noconflict) end end - let ci = code_typed1(m.def_noinline_noconflict, (Int,)) - @test any(ci.code) do x + let code = code_typed1(m.def_noinline_noconflict, (Int,)) + @test any(code) do x isinvoke(x, :_def_noinline_noconflict) end end @@ -471,36 +471,149 @@ end end end - let ci = code_typed1(m.body_inline, (Int,)) - @test all(ci.code) do x + let code = code_typed1(m.body_inline, (Int,)) + @test all(code) do x !isinvoke(x, :_body_inline) end end - let ci = code_typed1(m.body_noinline, (Int,)) - @test any(ci.code) do x + let code = code_typed1(m.body_noinline, (Int,)) + @test any(code) do x isinvoke(x, :_body_noinline) end end # test annotations for `do` blocks - let ci = code_typed1(m.do_inline, (Int,)) + let code = code_typed1(m.do_inline, (Int,)) # what we test here is that both `simple_caller` and the anonymous function that the # `do` block creates should inlined away, and as a result there is only the unresolved call - @test all(ci.code) do x + @test all(code) do x !isinvoke(x, :simple_caller) && !isinvoke(x, mi->startswith(string(mi.def.name), '#')) end end - let ci = code_typed1(m.do_noinline, (Int,)) + let code = code_typed1(m.do_noinline, (Int,)) # the anonymous function that the `do` block created shouldn't be inlined here - @test any(ci.code) do x + @test any(code) do x isinvoke(x, mi->startswith(string(mi.def.name), '#')) end end end +@testset "callsite @inline/@noinline annotations" begin + m = Module() + @eval m begin + # this global variable prevents inference to fold everything as constant, and/or the optimizer to inline the call accessing to this + g = 0 + + @noinline noinlined_explicit(x) = x + force_inline_explicit(x) = @inline noinlined_explicit(x) + force_inline_block_explicit(x) = @inline noinlined_explicit(x) + noinlined_explicit(x) + noinlined_implicit(x) = g + force_inline_implicit(x) = @inline noinlined_implicit(x) + force_inline_block_implicit(x) = @inline noinlined_implicit(x) + noinlined_implicit(x) + + @inline inlined_explicit(x) = x + force_noinline_explicit(x) = @noinline inlined_explicit(x) + force_noinline_block_explicit(x) = @noinline inlined_explicit(x) + inlined_explicit(x) + inlined_implicit(x) = x + force_noinline_implicit(x) = @noinline inlined_implicit(x) + force_noinline_block_implicit(x) = @noinline inlined_implicit(x) + inlined_implicit(x) + + # test callsite annotations for constant-prop'ed calls + + @noinline Base.@aggressive_constprop noinlined_constprop_explicit(a) = a+g + force_inline_constprop_explicit() = @inline noinlined_constprop_explicit(0) + Base.@aggressive_constprop noinlined_constprop_implicit(a) = a+g + force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0) + + @inline Base.@aggressive_constprop inlined_constprop_explicit(a) = a+g + force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0) + @inline Base.@aggressive_constprop inlined_constprop_implicit(a) = a+g + force_noinline_constprop_implicit() = @noinline inlined_constprop_implicit(0) + + @noinline notinlined(a) = a + function nested(a0, b0) + @noinline begin + a = @inline notinlined(a0) # this call should be inlined + b = notinlined(b0) # this call should NOT be inlined + return a, b + end + end + + # test inlining of un-cached callsites + + import Core.Compiler: isType + + limited(a) = @noinline(isType(a)) ? @inline(limited(a.parameters[1])) : rand(a) + + function multilimited(a) + if @noinline(isType(a)) + return @inline(multilimited(a.parameters[1])) + else + return rand(Bool) ? rand(a) : @inline(multilimited(a)) + end + end + end + + let code = code_typed1(m.force_inline_explicit, (Int,)) + @test all(x->!isinvoke(x, :noinlined_explicit), code) + end + let code = code_typed1(m.force_inline_block_explicit, (Int,)) + @test all(code) do x + !isinvoke(x, :noinlined_explicit) && + !isinvoke(x, :(+)) + end + end + let code = code_typed1(m.force_inline_implicit, (Int,)) + @test all(x->!isinvoke(x, :noinlined_implicit), code) + end + let code = code_typed1(m.force_inline_block_implicit, (Int,)) + @test all(x->!isinvoke(x, :noinlined_explicit), code) + end + + let code = code_typed1(m.force_noinline_explicit, (Int,)) + @test any(x->isinvoke(x, :inlined_explicit), code) + end + let code = code_typed1(m.force_noinline_block_explicit, (Int,)) + @test count(x->isinvoke(x, :inlined_explicit), code) == 2 + end + let code = code_typed1(m.force_noinline_implicit, (Int,)) + @test any(x->isinvoke(x, :inlined_implicit), code) + end + let code = code_typed1(m.force_noinline_block_implicit, (Int,)) + @test count(x->isinvoke(x, :inlined_implicit), code) == 2 + end + + let code = code_typed1(m.force_inline_constprop_explicit) + @test all(x->!isinvoke(x, :noinlined_constprop_explicit), code) + end + let code = code_typed1(m.force_inline_constprop_implicit) + @test all(x->!isinvoke(x, :noinlined_constprop_implicit), code) + end + + let code = code_typed1(m.force_noinline_constprop_explicit) + @test any(x->isinvoke(x, :inlined_constprop_explicit), code) + end + let code = code_typed1(m.force_noinline_constprop_implicit) + @test any(x->isinvoke(x, :inlined_constprop_implicit), code) + end + + let code = code_typed1(m.nested, (Int,Int)) + @test count(x->isinvoke(x, :notinlined), code) == 1 + end + + let code = code_typed1(m.limited, (Any,)) + @test count(x->isinvoke(x, :isType), code) == 2 + end + # check that inlining for recursive callsites doesn't depend on inference local cache + let code1 = code_typed1(m.multilimited, (Any,)) + code2 = code_typed1(m.multilimited, (Any,)) + @test code1 == code2 + end +end + # force constant-prop' for `setproperty!` -let m = Module() - ci = @eval m begin +# https://github.com/JuliaLang/julia/pull/41882 +let ci = @eval Module() begin # if we don't force constant-prop', `T = fieldtype(Foo, ::Symbol)` will be union-split to # `Union{Type{Any},Type{Int}` and it will make `convert(T, nothing)` too costly # and it leads to inlining failure