Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimizer: supports callsite annotations of inlining, fixes #18773 #41328

Merged
merged 11 commits into from
Sep 1, 2021
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ New language features
---------------------

* `Module(:name, false, false)` can be used to create a `module` that does not import `Core`. ([#40110])
* `@inline` and `@noinline` annotations may now be used in function bodies. ([#41312])
* `@inline` and `@noinline` annotations can be used within a function body to give an extra
hint about the inlining cost to the compiler. ([#41312])
* `@inline` and `@noinline` annotations can now be applied to a function callsite or block
to enforce the involved function calls to be (or not to be) inlined. ([#41312])
* The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551])

Language changes
Expand Down Expand Up @@ -39,7 +42,7 @@ New library features

* `@test_throws "some message" triggers_error()` can now be used to check whether the displayed error text
contains "some message" regardless of the specific exception type.
Regular expressions, lists of strings, and matching functions are also supported. ([#41888)
Regular expressions, lists of strings, and matching functions are also supported. ([#41888])

Standard library changes
------------------------
Expand Down
12 changes: 8 additions & 4 deletions base/compiler/abstractinterpretation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me
return nothing
end
mi = mi::MethodInstance
if !force && !const_prop_methodinstance_heuristic(interp, method, mi)
if !force && !const_prop_methodinstance_heuristic(interp, match, mi, sv)
add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
return nothing
end
Expand Down Expand Up @@ -696,7 +696,8 @@ end
# This is a heuristic to avoid trying to const prop through complicated functions
# where we would spend a lot of time, but are probably unlikely to get an improved
# result anyway.
function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance)
function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance, sv::InferenceState)
method = match.method
if method.is_for_opaque_closure
# Not inlining an opaque closure can be very expensive, so be generous
# with the const-prop-ability. It is quite possible that we can't infer
Expand All @@ -714,7 +715,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method
if isdefined(code, :inferred) && !cache_inlineable
cache_inf = code.inferred
if !(cache_inf === nothing)
cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing
src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv))
cache_inlineable = src !== nothing
end
end
if !cache_inlineable
Expand Down Expand Up @@ -1908,7 +1910,9 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
if isa(fname, SlotNumber)
changes = StateUpdate(fname, VarState(Any, false), changes, false)
end
elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect
elseif hd === :code_coverage_effect ||
(hd !== :boundscheck && # :boundscheck can be narrowed to Bool
hd !== nothing && is_meta_expr_head(hd))
# these do not generate code
else
t = abstract_eval_statement(interp, stmt, changes, frame)
Expand Down
2 changes: 2 additions & 0 deletions base/compiler/inferencestate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,5 @@ function print_callstack(sv::InferenceState)
sv = sv.parent
end
end

get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
72 changes: 33 additions & 39 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,28 @@ function push!(et::EdgeTracker, ci::CodeInstance)
push!(et, ci.def)
end

struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P}
struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter}
params::OptimizationParams
et::S
mi_cache::T
policy::P
interp::I
end

function default_inlining_policy(@nospecialize(src))
function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8)
if isa(src, CodeInfo) || isa(src, Vector{UInt8})
src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
return src_inferred && src_inlineable ? src : nothing
elseif isa(src, OptimizationState) && isdefined(src, :ir)
return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to check that src is inferred and optimized here before using it?

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually NativeInterpreter won't hit this pass, because finish! transforms OptimizationState into CodeInfo.
AFAIU this is basically external interpreter stuff, and I don't think we need more checks here because (opt::OptimizationState).ir is only introduced when inference is successful and the source is optimized ?
@Keno might have another idea on this though.

else
# maybe we want to make inference keep the source in a local cache if a statement is going to inlined
# and re-optimize it here with disabling further inlining to avoid infinite optimization loop
# (we can even naively try to re-infer it entirely)
# but it seems like that "single-level-inlining" is more trouble and complex than it's worth
# see https://github.com/JuliaLang/julia/pull/41328/commits/0fc0f71a42b8c9d04b0dafabf3f1f17703abf2e7
return nothing
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized that although it feels like this will make the optimization unstable and unpredictable, we actually already hit this problem in precompile/codegen/optimization for existing code. So we are not introducing a new problem here. Perhaps making the current situation worse in this case, but feels probably acceptable, since it may need to be fixed eventually anyways.

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to fix this in another PR if I (or someone) find any value in it.

For the meanwhile, I think it might be appropriated to leave this limitation in docstring ?
How about adding this comment in @inline block docstring ?

!!! warning
    Although a callsite annotation will force inlining in regardless of the cost model,
    there are still cases it can't succeed in inlining.
    Especially, it can't inline recursive calls.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dropping a real compiler warning in case of failure isn't a possibility? (For call site inline annotations I find "force" semantics more intuitive than "wish". And when forcing something impossible, better to know it failed.)

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible, but I'm afraid to spam with such warnings (they're really not interest from user's point of view, I think):

module Pkg
recursivef(...) = ...
function userfacingf(...)
  @inline recursivef(...)
end
export userfacingf
end

userfacingf(...) # we want warning ... ?

We can have it as opt-in feature, but that could be addressed in another PR.
Or, we can create new JET analyzer to report such cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning would be printed during module development, so the module developer can fix it before releasing the code. I find the warning appropriate in your example.

It is possible to imagine more complex examples, e.g. callback-like scenarios, but if the module developer thinks inlining of user-specified functions is mandantory even against the policy, then it seems ok for them to have their users warned that they should not provide recursive callbacks.

I think the root problem is that now the macro has different semantics based on where it is used.

A solution may be to rename this to @force-inline. If the warning is annoying, then a callsite @inline can be added later that does not avoid the cost-model thus does not drops a warning. That would be a much more coherent system in my view.

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, let's leave this to another PR.
Currently we don't have any agreement on how we want the compiler to emit warnings. E.g. it doesn't report anything back to us even if it finds obvious type errors during inference, but we could argue that it's more appropriate to get some warnings.

So I don't want this discussion to hold this PR.
If we adopt @force_inline naming, it should be done here before releasing it to nightly, but I'm afraid to accept it.
Given that there is no way to "force" inlining" with the existence of recursive calls, the essential difference between @force_inline and @inline is whether they emit the warning or not. And having two different names that does the same thing but the warning is more confusing than benefit imho.

end
if isa(src, OptimizationState) && isdefined(src, :ir)
return src.src.inlineable ? src.ir : nothing
end
return nothing
end

include("compiler/ssair/driver.jl")
Expand All @@ -57,7 +62,7 @@ mutable struct OptimizationState
inlining = InliningState(params,
EdgeTracker(s_edges, frame.valid_worlds),
WorldView(code_cache(interp), frame.world),
inlining_policy(interp))
interp)
return new(frame.linfo,
frame.src, nothing, frame.stmt_info, frame.mod,
frame.sptypes, frame.slottypes, false,
Expand Down Expand Up @@ -86,7 +91,7 @@ mutable struct OptimizationState
inlining = InliningState(params,
nothing,
WorldView(code_cache(interp), get_world_counter()),
inlining_policy(interp))
interp)
return new(linfo,
src, nothing, stmt_info, mod,
sptypes_from_meth_instance(linfo), slottypes, false,
Expand Down Expand Up @@ -125,9 +130,15 @@ const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
# const SLOT_CALLED = 64

# This statement was marked as @inbounds by the user. If replaced by inlining,
# any contained boundschecks may be removed
const IR_FLAG_INBOUNDS = 0x01
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c

# This statement is marked as @inbounds by user.
# Ff replaced by inlining, any contained boundschecks may be removed.
const IR_FLAG_INBOUNDS = 0x01 << 0
# This statement is marked as @inline by user
const IR_FLAG_INLINE = 0x01 << 1
# This statement is marked as @noinline by user
const IR_FLAG_NOINLINE = 0x01 << 2
# This statement may be removed if its result is unused. In particular it must
# thus be both pure and effect free.
const IR_FLAG_EFFECT_FREE = 0x01 << 4
Expand Down Expand Up @@ -173,6 +184,9 @@ function isinlineable(m::Method, me::OptimizationState, params::OptimizationPara
return inlineable
end

is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE != 0
is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE != 0

# These affect control flow within the function (so may not be removed
# if there is no usage within the function), but don't affect the purity
# of the function as a whole.
Expand Down Expand Up @@ -358,42 +372,22 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv::
end
renumber_ir_elements!(code, changemap, labelmap)

inbounds_depth = 0 # Number of stacked inbounds
meta = Any[]
flags = fill(0x00, length(code))
for i = 1:length(code)
stmt = code[i]
if isexpr(stmt, :inbounds)
arg1 = stmt.args[1]
if arg1 === true # push
inbounds_depth += 1
elseif arg1 === false # clear
inbounds_depth = 0
elseif inbounds_depth > 0 # pop
inbounds_depth -= 1
end
stmt = nothing
else
stmt = normalize(stmt, meta)
end
code[i] = stmt
if !(stmt === nothing)
if inbounds_depth > 0
flags[i] |= IR_FLAG_INBOUNDS
end
end
code[i] = remove_meta!(code[i], meta)
end
aviatesk marked this conversation as resolved.
Show resolved Hide resolved
strip_trailing_junk!(ci, code, stmtinfo, flags)
strip_trailing_junk!(ci, code, stmtinfo)
cfg = compute_basic_blocks(code)
types = Any[]
stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags)
stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, ci.ssaflags)
ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes)
return ir
end

function normalize(@nospecialize(stmt), meta::Vector{Any})
function remove_meta!(@nospecialize(stmt), meta::Vector{Any})
if isa(stmt, Expr)
if stmt.head === :meta
head = stmt.head
if head === :meta
args = stmt.args
if length(args) > 0
push!(meta, stmt)
Expand Down
Loading