From f4dc5c1cc002da19f4c2f3ed0850330cb905281f Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Mon, 30 Oct 2023 17:14:58 +0900
Subject: [PATCH] inference: don't put globally-cached results into inference
 local cache (#51888)

Now, we fill the inference's local cache exclusively with locally-cached
results. Given that sources of globally-cached results will be populated
from the global cache when needed (for inlining), there's no need for
them to waste memory in the local cache.

@nanosoldier `runbenchmarks("inference", vs=":master")`
---
 base/compiler/abstractinterpretation.jl |  2 +-
 base/compiler/inferencestate.jl         | 15 +++----
 base/compiler/typeinfer.jl              | 58 +++++++++++++------------
 stdlib/REPL/src/REPLCompletions.jl      |  4 +-
 test/compiler/AbstractInterpreter.jl    |  6 +--
 5 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index d3d7d82a1e1505..71decf1bb29063 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1181,7 +1181,7 @@ function const_prop_call(interp::AbstractInterpreter,
             add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
             return nothing
         end
-        frame = InferenceState(inf_result, #=cache=#:local, interp)
+        frame = InferenceState(inf_result, #=cache_mode=#:local, interp)
         if frame === nothing
             add_remark!(interp, sv, "[constprop] Could not retrieve the source")
             return nothing # this is probably a bad generated function (unsound), but just ignore it
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 6a6aba10764498..8eb86f48351ed3 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -240,7 +240,7 @@ mutable struct InferenceState
     # Whether to restrict inference of abstract call sites to avoid excessive work
     # Set by default for toplevel frame.
     restrict_abstract_call_sites::Bool
-    cached::Bool # TODO move this to InferenceResult?
+    cache_mode::Symbol # TODO move this to InferenceResult?
     insert_coverage::Bool
 
     # The interpreter that created this inference state. Not looked at by
@@ -248,7 +248,7 @@ mutable struct InferenceState
     interp::AbstractInterpreter
 
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
-    function InferenceState(result::InferenceResult, src::CodeInfo, cache::Symbol,
+    function InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::Symbol,
                             interp::AbstractInterpreter)
         linfo = result.linfo
         world = get_world_counter(interp)
@@ -303,19 +303,18 @@ mutable struct InferenceState
         end
 
         restrict_abstract_call_sites = isa(def, Module)
-        @assert cache === :no || cache === :local || cache === :global
-        cached = cache === :global
+        @assert cache_mode === :no || cache_mode === :local || cache_mode === :global
 
         # some more setups
         InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
-        cache !== :no && push!(get_inference_cache(interp), result)
+        cache_mode === :local && push!(get_inference_cache(interp), result)
 
         return new(
             linfo, world, mod, sptypes, slottypes, src, cfg, method_info,
             currbb, currpc, ip, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
             pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
             result, unreachable, valid_worlds, bestguess, ipo_effects,
-            restrict_abstract_call_sites, cached, insert_coverage,
+            restrict_abstract_call_sites, cache_mode, insert_coverage,
             interp)
     end
 end
@@ -667,7 +666,7 @@ end
 function print_callstack(sv::InferenceState)
     while sv !== nothing
         print(sv.linfo)
-        !sv.cached && print("  [uncached]")
+        sv.cache_mode === :global || print("  [uncached]")
         println()
         for cycle in sv.callers_in_cycle
             print(' ', cycle.linfo)
@@ -765,7 +764,7 @@ frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
 is_constproped(sv::InferenceState) = any(sv.result.overridden_by_const)
 is_constproped(::IRInterpretationState) = true
 
-is_cached(sv::InferenceState) = sv.cached
+is_cached(sv::InferenceState) = sv.cache_mode === :global
 is_cached(::IRInterpretationState) = false
 
 method_info(sv::InferenceState) = sv.method_info
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index cebf178504a688..d1f635d3be704d 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -5,10 +5,10 @@ const track_newly_inferred = RefValue{Bool}(false)
 const newly_inferred = CodeInstance[]
 
 # build (and start inferring) the inference frame for the top-level MethodInstance
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
-    frame = InferenceState(result, cache, interp)
+function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache_mode::Symbol)
+    frame = InferenceState(result, cache_mode, interp)
     frame === nothing && return false
-    cache === :global && lock_mi_inference(interp, result.linfo)
+    cache_mode === :global && lock_mi_inference(interp, result.linfo)
     return typeinf(interp, frame)
 end
 
@@ -239,7 +239,7 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState)
         # but that is what !must_be_codeinf permits
         # This is hopefully unreachable when must_be_codeinf is true
     end
-    return
+    return nothing
 end
 
 function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
@@ -267,11 +267,14 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     end
     for caller in frames
         finish!(caller.interp, caller)
-        if caller.cached
+        if caller.cache_mode === :global
             cache_result!(caller.interp, caller.result)
         end
-        # n.b. We do not drop result.src here, even though that wastes memory while it is still in the local cache
-        # since the user might have requested call-site inlining of it.
+        # Drop result.src here since otherwise it can waste memory.
+        # N.B. If the `cache_mode === :local`, the inliner may request to use it later.
+        if caller.cache_mode !== :local
+            caller.result.src = nothing
+        end
     end
     empty!(frames)
     return true
@@ -381,23 +384,23 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
     end
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
-    linfo = result.linfo
-    already_inferred = already_inferred_quick_test(interp, linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
+    mi = result.linfo
+    already_inferred = already_inferred_quick_test(interp, mi)
+    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), mi)
         already_inferred = true
     end
 
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
-        code_cache(interp)[linfo] = ci = CodeInstance(interp, result, valid_worlds)
+        code_cache(interp)[mi] = ci = CodeInstance(interp, result, valid_worlds)
         if track_newly_inferred[]
-            m = linfo.def
+            m = mi.def
             if isa(m, Method) && m.module != Core
                 ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
             end
         end
     end
-    unlock_mi_inference(interp, linfo)
+    unlock_mi_inference(interp, mi)
     nothing
 end
 
@@ -543,7 +546,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         # a parent may be cached still, but not this intermediate work:
         # we can throw everything else away now
         me.result.src = nothing
-        me.cached = false
+        me.cache_mode = :no
         set_inlineable!(me.src, false)
         unlock_mi_inference(interp, me.linfo)
     elseif limited_src
@@ -555,7 +558,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         # annotate fulltree with type information,
         # either because we are the outermost code, or we might use this later
         type_annotate!(interp, me)
-        doopt = (me.cached || me.parent !== nothing)
+        doopt = (me.cache_mode !== :no || me.parent !== nothing)
         # Disable the optimizer if we've already determined that there's nothing for
         # it to do.
         if may_discard_trees(interp) && is_result_constabi_eligible(me.result)
@@ -563,8 +566,6 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         end
         if doopt && may_optimize(interp)
             me.result.src = OptimizationState(me, interp)
-        else
-            me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
         end
     end
     validate_code_in_debug_mode(me.linfo, me.src, "inferred")
@@ -814,7 +815,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
             # we already inferred this edge before and decided to discard the inferred code,
             # nevertheless we re-infer it here again and keep it around in the local cache
             # since the inliner will request to use it later
-            cache = :local
+            cache_mode = :local
         else
             rt = cached_return_type(code)
             effects = ipo_effects(code)
@@ -822,7 +823,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
             return EdgeCallResult(rt, mi, effects)
         end
     else
-        cache = :global # cache edge targets by default
+        cache_mode = :global # cache edge targets by default
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
         add_remark!(interp, caller, "Inference is disabled for the target module")
@@ -839,7 +840,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         # completely new
         lock_mi_inference(interp, mi)
         result = InferenceResult(mi, typeinf_lattice(interp))
-        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
+        frame = InferenceState(result, cache_mode, interp) # always use the cache for edge targets
         if frame === nothing
             add_remark!(interp, caller, "Failed to retrieve source")
             # can't get the source for this, so we know nothing
@@ -970,7 +971,8 @@ typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype),
 function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
     start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
     result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
+    cache_mode = run_optimizer ? :global : :no
+    frame = InferenceState(result, cache_mode, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
@@ -1010,7 +1012,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
     end
     lock_mi_inference(interp, mi)
     result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, #=cache=#:global, interp)
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
@@ -1043,18 +1045,18 @@ end
 
 # This is a bridge for the C code calling `jl_typeinf_func()`
 typeinf_ext_toplevel(mi::MethodInstance, world::UInt) = typeinf_ext_toplevel(NativeInterpreter(world), mi)
-function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance)
-    if isa(linfo.def, Method)
+function typeinf_ext_toplevel(interp::AbstractInterpreter, mi::MethodInstance)
+    if isa(mi.def, Method)
         # method lambda - infer this specialization via the method cache
-        src = typeinf_ext(interp, linfo)
+        src = typeinf_ext(interp, mi)
     else
-        src = linfo.uninferred::CodeInfo
+        src = mi.uninferred::CodeInfo
         if !src.inferred
             # toplevel lambda - infer directly
             start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
             if !src.inferred
-                result = InferenceResult(linfo, typeinf_lattice(interp))
-                frame = InferenceState(result, src, #=cache=#:global, interp)
+                result = InferenceResult(mi, typeinf_lattice(interp))
+                frame = InferenceState(result, src, #=cache_mode=#:global, interp)
                 typeinf(interp, frame)
                 @assert is_inferred(frame) # TODO: deal with this better
                 src = frame.src
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index cb9dc292d96649..d7f5a7cc0c23f4 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -519,10 +519,10 @@ CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.Infer
 # `REPLInterpreter` is specifically used by `repl_eval_ex`, where all top-level frames are
 # `repl_frame` always. However, this assumption wouldn't stand if `REPLInterpreter` were to
 # be employed, for instance, by `typeinf_ext_toplevel`.
-is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && !sv.cached
+is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && sv.cache_mode === :no
 
 function is_call_graph_uncached(sv::CC.InferenceState)
-    sv.cached && return false
+    sv.cache_mode === :global && return false
     parent = sv.parent
     parent === nothing && return true
     return is_call_graph_uncached(parent::CC.InferenceState)
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index 2d77c148275b92..083071b566da62 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -437,18 +437,14 @@ custom_lookup_context(x::Int) = custom_lookup_target(true, x)
 const CONST_INVOKE_INTERP_WORLD = Base.get_world_counter()
 const CONST_INVOKE_INTERP = ConstInvokeInterp(; world=CONST_INVOKE_INTERP_WORLD)
 function custom_lookup(mi::MethodInstance, min_world::UInt, max_world::UInt)
-    local matched_mi = nothing
     for inf_result in CONST_INVOKE_INTERP.inf_cache
         if inf_result.linfo === mi
             if CC.any(inf_result.overridden_by_const)
                 return CodeInstance(CONST_INVOKE_INTERP, inf_result, inf_result.valid_worlds)
-            elseif matched_mi === nothing
-                matched_mi = inf_result.linfo
             end
         end
     end
-    matched_mi === nothing && return nothing
-    return CONST_INVOKE_INTERP.code_cache.dict[matched_mi]
+    return CONST_INVOKE_INTERP.code_cache.dict[mi]
 end
 
 let # generate cache