GH-109369: Add machinery for deoptimizing tier2 executors, both indiv…

…idually and globally. (GH-110384)
python · Oct 23, 2023 · 52e902c · 52e902c
1 parent 32c37fe
commit 52e902c
Show file tree

Hide file tree

Showing 7 changed files with 353 additions and 2 deletions.
diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h
@@ -6,9 +6,27 @@
 extern "C" {
 #endif
 
+typedef struct _PyExecutorLinkListNode {
+    struct _PyExecutorObject *next;
+    struct _PyExecutorObject *previous;
+} _PyExecutorLinkListNode;
+
+
+/* Bloom filter with m = 256
+ * https://en.wikipedia.org/wiki/Bloom_filter */
+#define BLOOM_FILTER_WORDS 8
+
+typedef struct _bloom_filter {
+    uint32_t bits[BLOOM_FILTER_WORDS];
+} _PyBloomFilter;
+
 typedef struct {
     uint8_t opcode;
     uint8_t oparg;
+    uint8_t valid;
+    uint8_t linked;
+    _PyBloomFilter bloom;
+    _PyExecutorLinkListNode links;
 } _PyVMData;
 
 typedef struct _PyExecutorObject {
@@ -45,6 +63,14 @@ _PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_
 
 extern _PyOptimizerObject _PyOptimizer_Default;
 
+void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
+void _Py_ExecutorClear(_PyExecutorObject *);
+void _Py_BloomFilter_Init(_PyBloomFilter *);
+void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
+PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
+PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj);
+extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp);
+
 /* For testing */
 PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
 PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);

diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
@@ -215,6 +215,7 @@ struct _is {
     struct types_state types;
     struct callable_cache callable_cache;
     _PyOptimizerObject *optimizer;
+    _PyExecutorObject *executor_list_head;
     uint16_t optimizer_resume_threshold;
     uint16_t optimizer_backedge_threshold;
     uint32_t next_func_version;

diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
@@ -2489,6 +2489,67 @@ def get_first_executor(func):
     return None
 
 
+class TestExecutorInvalidation(unittest.TestCase):
+
+    def setUp(self):
+        self.old = _testinternalcapi.get_optimizer()
+        self.opt = _testinternalcapi.get_counter_optimizer()
+        _testinternalcapi.set_optimizer(self.opt)
+
+    def tearDown(self):
+        _testinternalcapi.set_optimizer(self.old)
+
+    def test_invalidate_object(self):
+        # Generate a new set of functions at each call
+        ns = {}
+        func_src = "\n".join(
+            f"""
+            def f{n}():
+                for _ in range(1000):
+                    pass
+            """ for n in range(5)
+        )
+        exec(textwrap.dedent(func_src), ns, ns)
+        funcs = [ ns[f'f{n}'] for n in range(5)]
+        objects = [object() for _ in range(5)]
+
+        for f in funcs:
+            f()
+        executors = [get_first_executor(f) for f in funcs]
+        # Set things up so each executor depends on the objects
+        # with an equal or lower index.
+        for i, exe in enumerate(executors):
+            self.assertTrue(exe.valid)
+            for obj in objects[:i+1]:
+                _testinternalcapi.add_executor_dependency(exe, obj)
+            self.assertTrue(exe.valid)
+        # Assert that the correct executors are invalidated
+        # and check that nothing crashes when we invalidate
+        # an executor mutliple times.
+        for i in (4,3,2,1,0):
+            _testinternalcapi.invalidate_executors(objects[i])
+            for exe in executors[i:]:
+                self.assertFalse(exe.valid)
+            for exe in executors[:i]:
+                self.assertTrue(exe.valid)
+
+    def test_uop_optimizer_invalidation(self):
+        # Generate a new function at each call
+        ns = {}
+        exec(textwrap.dedent("""
+            def f():
+                for i in range(1000):
+                    pass
+        """), ns, ns)
+        f = ns['f']
+        opt = _testinternalcapi.get_uop_optimizer()
+        with temporary_optimizer(opt):
+            f()
+        exe = get_first_executor(f)
+        self.assertTrue(exe.valid)
+        _testinternalcapi.invalidate_executors(f.__code__)
+        self.assertFalse(exe.valid)
+
 class TestUops(unittest.TestCase):
 
     def test_basic_loop(self):

diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
@@ -1002,6 +1002,32 @@ get_executor(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
     return (PyObject *)PyUnstable_GetExecutor((PyCodeObject *)code, ioffset);
 }
 
+static PyObject *
+add_executor_dependency(PyObject *self, PyObject *args)
+{
+    PyObject *exec;
+    PyObject *obj;
+    if (!PyArg_ParseTuple(args, "OO", &exec, &obj)) {
+        return NULL;
+    }
+    /* No way to tell in general if exec is an executor, so we only accept
+     * counting_executor */
+    if (strcmp(Py_TYPE(exec)->tp_name, "counting_executor")) {
+        PyErr_SetString(PyExc_TypeError, "argument must be a counting_executor");
+        return NULL;
+    }
+    _Py_Executor_DependsOn((_PyExecutorObject *)exec, obj);
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+invalidate_executors(PyObject *self, PyObject *obj)
+{
+    PyInterpreterState *interp = PyInterpreterState_Get();
+    _Py_Executors_InvalidateDependency(interp, obj);
+    Py_RETURN_NONE;
+}
+
 static int _pending_callback(void *arg)
 {
     /* we assume the argument is callable object to which we own a reference */
@@ -1565,6 +1591,8 @@ static PyMethodDef module_functions[] = {
     {"get_executor", _PyCFunction_CAST(get_executor),  METH_FASTCALL, NULL},
     {"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL},
     {"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL},
+    {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},
+    {"invalidate_executors", invalidate_executors, METH_O, NULL},
     {"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc),
      METH_VARARGS | METH_KEYWORDS},
     {"pending_identify", pending_identify, METH_VARARGS, NULL},

diff --git a/Python/instrumentation.c b/Python/instrumentation.c
@@ -1582,6 +1582,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
     if (code->co_executors != NULL) {
         _PyCode_Clear_Executors(code);
     }
+    _Py_Executors_InvalidateDependency(interp, code);
     int code_len = (int)Py_SIZE(code);
     /* code->_co_firsttraceable >= code_len indicates
      * that no instrumentation can be inserted.
@@ -1803,6 +1804,7 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
         return -1;
     }
     set_global_version(interp, new_version);
+    _Py_Executors_InvalidateAll(interp);
     return instrument_all_executing_code_objects(interp);
 }
 
@@ -1832,6 +1834,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent
         /* Force instrumentation update */
         code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT;
     }
+    _Py_Executors_InvalidateDependency(interp, code);
     if (_Py_Instrument(code, interp)) {
         return -1;
     }