Skip to content

Commit

Permalink
GH-109369: Merge all eval-breaker flags and monitoring version into o…
Browse files Browse the repository at this point in the history
…ne word. (GH-109846)
  • Loading branch information
markshannon authored Oct 4, 2023
1 parent 7c149a7 commit bf4bc36
Show file tree
Hide file tree
Showing 13 changed files with 188 additions and 234 deletions.
2 changes: 1 addition & 1 deletion Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ typedef struct {
PyObject *co_weakreflist; /* to support weakrefs to code objects */ \
_PyExecutorArray *co_executors; /* executors from optimizer */ \
_PyCoCached *_co_cached; /* cached co_* attributes */ \
uint64_t _co_instrumentation_version; /* current instrumentation version */ \
uintptr_t _co_instrumentation_version; /* current instrumentation version */ \
_PyCoMonitoringData *_co_monitoring; /* Monitoring data */ \
int _co_firsttraceable; /* index of first traceable instruction */ \
/* Scratch space for extra data relating to the code object. \
Expand Down
33 changes: 33 additions & 0 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,39 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);


#define _PY_GIL_DROP_REQUEST_BIT 0
#define _PY_SIGNALS_PENDING_BIT 1
#define _PY_CALLS_TO_DO_BIT 2
#define _PY_ASYNC_EXCEPTION_BIT 3
#define _PY_GC_SCHEDULED_BIT 4

/* Reserve a few bits for future use */
#define _PY_EVAL_EVENTS_BITS 8
#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1)

static inline void
_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set)
{
assert(set == 0 || set == 1);
uintptr_t to_set = set << bit;
uintptr_t mask = ((uintptr_t)1) << bit;
uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker);
if ((old & mask) == to_set) {
return;
}
uintptr_t new;
do {
new = (old & ~mask) | to_set;
} while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new));
}

static inline bool
_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit)
{
return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit);
}


#ifdef __cplusplus
}
#endif
Expand Down
19 changes: 4 additions & 15 deletions Include/internal/pycore_ceval_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ struct _pending_calls {
int busy;
PyThread_type_lock lock;
/* Request for running pending calls. */
_Py_atomic_int calls_to_do;
/* Request for looking at the `async_exc` field of the current
thread state.
Guarded by the GIL. */
int async_exc;
int32_t calls_to_do;
#define NPENDINGCALLS 32
struct _pending_call {
_Py_pending_call_func func;
Expand Down Expand Up @@ -62,11 +58,6 @@ struct _ceval_runtime_state {
int _not_used;
#endif
} perf;
/* Request for checking signals. It is shared by all interpreters (see
bpo-40513). Any thread of any interpreter can receive a signal, but only
the main thread of the main interpreter can handle signals: see
_Py_ThreadCanHandleSignals(). */
_Py_atomic_int signals_pending;
/* Pending calls to be made only on the main thread. */
struct _pending_calls pending_mainthread;
};
Expand All @@ -87,14 +78,12 @@ struct _ceval_state {
* the fast path in the eval loop.
* It is by far the hottest field in this struct and
* should be placed at the beginning. */
_Py_atomic_int eval_breaker;
/* Request for dropping the GIL */
_Py_atomic_int gil_drop_request;
uintptr_t eval_breaker;
/* Avoid false sharing */
int64_t padding[7];
int recursion_limit;
struct _gil_runtime_state *gil;
int own_gil;
/* The GC is ready to be executed */
_Py_atomic_int gc_scheduled;
struct _pending_calls pending;
};

Expand Down
3 changes: 1 addition & 2 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ struct _is {
int _initialized;
int finalizing;

uint64_t monitoring_version;
uint64_t last_restart_version;
uintptr_t last_restart_version;
struct pythreads {
uint64_t next_unique_id;
/* The linked list of threads, newest first. */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The internal eval_breaker and supporting flags, plus the monitoring version
have been merged into a single atomic integer to speed up checks.
7 changes: 2 additions & 5 deletions Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*/

#include "Python.h"
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_initconfig.h"
Expand Down Expand Up @@ -2274,11 +2275,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
if (gcstate->collecting == 1) {
return;
}
struct _ceval_state *ceval = &interp->ceval;
if (!_Py_atomic_load_relaxed(&ceval->gc_scheduled)) {
_Py_atomic_store_relaxed(&ceval->gc_scheduled, 1);
_Py_atomic_store_relaxed(&ceval->eval_breaker, 1);
}
_Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
}

void
Expand Down
5 changes: 2 additions & 3 deletions Modules/signalmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1767,9 +1767,8 @@ PyErr_CheckSignals(void)
Python code to ensure signals are handled. Checking for the GC here
allows long running native code to clean cycles created using the C-API
even if it doesn't run the evaluation loop */
struct _ceval_state *interp_ceval_state = &tstate->interp->ceval;
if (_Py_atomic_load_relaxed(&interp_ceval_state->gc_scheduled)) {
_Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0);
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
_Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
_Py_RunGC(tstate);
}

Expand Down
22 changes: 13 additions & 9 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,12 @@ dummy_func(
inst(RESUME, (--)) {
TIER_ONE_ONLY
assert(frame == tstate->current_frame);
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
uintptr_t global_version =
_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((code_version & 255) == 0);
if (code_version != global_version) {
int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
ERROR_IF(err, error);
next_instr--;
Expand All @@ -154,17 +159,16 @@ dummy_func(
DEOPT_IF(_Py_emscripten_signal_clock == 0);
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
#endif
/* Possibly combine these two checks */
DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version);
DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker));
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
DEOPT_IF(eval_breaker != version);
}

inst(INSTRUMENTED_RESUME, (--)) {
/* Possible performance enhancement:
* We need to check the eval breaker anyway, can we
* combine the instrument verison check and the eval breaker test?
*/
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
if (code_version != global_version) {
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
goto error;
}
Expand Down
Loading

0 comments on commit bf4bc36

Please sign in to comment.