From bde62adff7dd98fefc14482da92713484a748846 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 25 Oct 2023 17:20:16 -0400
Subject: [PATCH] fix for invalid caching of CodeInfo from typeinf_ext (#51872)

When inference decided it was not necessary to cache the object, it also
skipped all of the work required to make the code valid, which
typeinf_ext depends upon. This resulted in caching invalid data, causing
effects tests to break unpredictably. This change ensures that we always
update the InferenceState with the final result (when `must_be_codeinf`
is true), so that typeinf_ext can get the correct results out of it for
internal codegen use. Previously we were disregarding that flag in some
cases.

Fixes one of the issues uncovered in #51860
---
 base/compiler/typeinfer.jl | 61 ++++++++++++++++----------------------
 src/aotcompile.cpp         |  2 +-
 src/gf.c                   | 10 +++++++
 src/jitlayers.cpp          |  3 +-
 src/jl_exported_funcs.inc  |  1 -
 src/julia_internal.h       |  2 ++
 6 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index f246289b5bea0..cebf178504a68 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -218,28 +218,28 @@ function typeinf(interp::NativeInterpreter, frame::InferenceState)
 end
 typeinf(interp::AbstractInterpreter, frame::InferenceState) = _typeinf(interp, frame)
 
-function finish!(interp::AbstractInterpreter, caller::InferenceResult)
-    # If we didn't transform the src for caching, we may have to transform
-    # it anyway for users like typeinf_ext. Do that here.
-    opt = caller.src
-    if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
-        if opt.ir !== nothing
-            if caller.must_be_codeinf
-                caller.src = ir_to_codeinf!(opt)
-            elseif is_inlineable(opt.src)
-                # TODO: If the CFG is too big, inlining becomes more expensive and if we're going to
-                # use this IR over and over, it's worth simplifying it. Round trips through
-                # CodeInstance do this implicitly, since they recompute the CFG, so try to
-                # match that behavior here.
-                # ir = cfg_simplify!(opt.ir)
-                caller.src = opt.ir
-            else
-                # Not cached and not inlineable - drop the ir
-                caller.src = nothing
-            end
-        end
+function finish!(interp::AbstractInterpreter, caller::InferenceState)
+    result = caller.result
+    valid_worlds = result.valid_worlds
+    if last(valid_worlds) >= get_world_counter()
+        # if we aren't cached, we don't need this edge
+        # but our caller might, so let's just make it anyways
+        store_backedges(result, caller.stmt_edges[1])
+    end
+    opt = result.src
+    if opt isa OptimizationState && result.must_be_codeinf
+        result.src = opt = ir_to_codeinf!(opt)
     end
-    return caller.src
+    if opt isa CodeInfo
+        opt.min_world = first(valid_worlds)
+        opt.max_world = last(valid_worlds)
+        caller.src = opt
+    else
+        # In this case caller.src is invalid for clients (such as typeinf_ext) to use
+        # but that is what !must_be_codeinf permits
+        # This is hopefully unreachable when must_be_codeinf is true
+    end
+    return
 end
 
 function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
@@ -266,17 +266,12 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
         end
     end
     for caller in frames
-        (; result ) = caller
-        valid_worlds = result.valid_worlds
-        if last(valid_worlds) >= get_world_counter()
-            # if we aren't cached, we don't need this edge
-            # but our caller might, so let's just make it anyways
-            store_backedges(result, caller.stmt_edges[1])
-        end
+        finish!(caller.interp, caller)
         if caller.cached
-            cache_result!(caller.interp, result)
+            cache_result!(caller.interp, caller.result)
         end
-        finish!(caller.interp, result)
+        # n.b. We do not drop result.src here, even though that wastes memory while it is still in the local cache
+        # since the user might have requested call-site inlining of it.
     end
     empty!(frames)
     return true
@@ -367,13 +362,7 @@ end
 function transform_result_for_cache(interp::AbstractInterpreter,
     linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult)
     inferred_result = result.src
-    if inferred_result isa OptimizationState{typeof(interp)}
-        # TODO respect must_be_codeinf setting here?
-        result.src = inferred_result = ir_to_codeinf!(inferred_result)
-    end
     if inferred_result isa CodeInfo
-        inferred_result.min_world = first(valid_worlds)
-        inferred_result.max_world = last(valid_worlds)
         inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
     end
     # The global cache can only handle objects that codegen understands
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 6f55319e60b7d..1a7c7e0f5b2f7 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -246,7 +246,7 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
         else {
             *src_out = jl_type_infer(mi, world, 0);
             if (*src_out) {
-                codeinst = jl_get_method_inferred(mi, (*src_out)->rettype, (*src_out)->min_world, (*src_out)->max_world);
+                codeinst = jl_get_codeinst_for_src(mi, *src_out);
                 if ((*src_out)->inferred) {
                     jl_value_t *null = nullptr;
                     jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
diff --git a/src/gf.c b/src/gf.c
index 407d9c517a55b..3caaa71924864 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -472,6 +472,16 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
     return codeinst;
 }
 
+JL_DLLEXPORT jl_code_instance_t *jl_get_codeinst_for_src(
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_code_info_t *src)
+{
+    // TODO: copy backedges from src to mi
+    size_t max_world = src->max_world;
+    if (max_world >= jl_atomic_load_acquire(&jl_world_counter))
+        max_world = ~(size_t)0;
+    return jl_get_method_inferred(mi, src->rettype, src->min_world, max_world);
+}
+
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 616ec05d4c362..fbc4b35310797 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -507,6 +507,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
             // see if it is inferred, or try to infer it for ourself.
             // (but don't bother with typeinf on macros or toplevel thunks)
             src = jl_type_infer(mi, world, 0);
+            codeinst = nullptr;
         }
     }
     jl_code_instance_t *compiled = jl_method_compiled(mi, world);
@@ -515,7 +516,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
     }
     else if (src && jl_is_code_info(src)) {
         if (!codeinst) {
-            codeinst = jl_get_method_inferred(mi, src->rettype, src->min_world, src->max_world);
+            codeinst = jl_get_codeinst_for_src(mi, src);
             if (src->inferred) {
                 jl_value_t *null = nullptr;
                 jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index c503eeb4407b3..4ac04d6d0fccd 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -226,7 +226,6 @@
     XX(jl_get_JIT) \
     XX(jl_get_julia_bin) \
     XX(jl_get_julia_bindir) \
-    XX(jl_get_method_inferred) \
     XX(jl_get_module_compile) \
     XX(jl_get_module_infer) \
     XX(jl_get_module_of_binding) \
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 4f326216d8daf..0e1a02b03ec28 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -635,6 +635,8 @@ JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world);
+JL_DLLEXPORT jl_code_instance_t *jl_get_codeinst_for_src(
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_code_info_t *src);
 jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT);
 jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);