Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-112532: Use separate mimalloc heaps for GC objects #113263

Merged
merged 3 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Include/internal/mimalloc/mimalloc/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);

// os.c
void _mi_os_init(void); // called from process init
Expand Down Expand Up @@ -170,6 +171,7 @@ size_t _mi_bin_size(uint8_t bin); // for stats
uint8_t _mi_bin(size_t size); // for stats

// "heap.c"
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id);
void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
Expand Down
28 changes: 28 additions & 0 deletions Include/internal/pycore_mimalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,39 @@
# error "pycore_mimalloc.h must be included before mimalloc.h"
#endif

typedef enum {
_Py_MIMALLOC_HEAP_MEM = 0, // PyMem_Malloc() and friends
_Py_MIMALLOC_HEAP_OBJECT = 1, // non-GC objects
_Py_MIMALLOC_HEAP_GC = 2, // GC objects without pre-header
_Py_MIMALLOC_HEAP_GC_PRE = 3, // GC objects with pre-header
_Py_MIMALLOC_HEAP_COUNT
} _Py_mimalloc_heap_id;

#include "pycore_pymem.h"

#ifdef WITH_MIMALLOC
#define MI_DEBUG_UNINIT PYMEM_CLEANBYTE
#define MI_DEBUG_FREED PYMEM_DEADBYTE
#define MI_DEBUG_PADDING PYMEM_FORBIDDENBYTE
#ifdef Py_DEBUG
# define MI_DEBUG 1
#else
# define MI_DEBUG 0
#endif

#include "mimalloc.h"
#include "mimalloc/types.h"
#include "mimalloc/internal.h"
#endif

struct _mimalloc_thread_state {
#ifdef Py_GIL_DISABLED
mi_heap_t *current_object_heap;
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
mi_tld_t tld;
#else
char _unused; // empty structs are not allowed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like this is only used when Py_GIL_DISABLED is defined, so maybe just not define the struct at all if the GIL is enabled?

#endif
};

#endif // Py_INTERNAL_MIMALLOC_H
1 change: 1 addition & 0 deletions Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ extern PyThreadState * _PyThreadState_New(
int whence);
extern void _PyThreadState_Bind(PyThreadState *tstate);
extern void _PyThreadState_DeleteExcept(PyThreadState *tstate);
extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate);

// Export for '_testinternalcapi' shared extension
PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate);
Expand Down
7 changes: 6 additions & 1 deletion Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_mimalloc.h" // struct _mimalloc_thread_state


// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
// PyThreadState fields are exposed as part of the C API, although most fields
Expand All @@ -16,7 +18,10 @@ typedef struct _PyThreadStateImpl {
// semi-public fields are in PyThreadState.
PyThreadState base;

// TODO: add private fields here
#ifdef Py_GIL_DISABLED
struct _mimalloc_thread_state mimalloc;
#endif

} _PyThreadStateImpl;


Expand Down
29 changes: 21 additions & 8 deletions Objects/mimalloc/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,17 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
const bool force = collect >= MI_FORCE;
_mi_deferred_free(heap, force);

// gh-112532: we may be called from a thread that is not the owner of the heap
bool is_main_thread = _mi_is_main_thread() && heap->thread_id == _mi_thread_id();

// note: never reclaim on collect but leave it to threads that need storage to reclaim
const bool force_main =
#ifdef NDEBUG
collect == MI_FORCE
#else
collect >= MI_FORCE
#endif
&& _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim;
&& is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim;

if (force_main) {
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
Expand Down Expand Up @@ -164,7 +167,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
}

// collect regions on program-exit (or shared library unload)
if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
if (force && is_main_thread && mi_heap_is_backing(heap)) {
_mi_thread_data_collect(); // collect thread data cache
_mi_arena_collect(true /* force purge */, &heap->tld->stats);
}
Expand Down Expand Up @@ -206,18 +209,28 @@ mi_heap_t* mi_heap_get_backing(void) {
return bheap;
}

mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id)
{
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld;
heap->tld = tld;
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
_mi_random_split(&bheap->random, &heap->random);
if (heap == tld->heap_backing) {
_mi_random_init(&heap->random);
}
else {
_mi_random_split(&tld->heap_backing->random, &heap->random);
}
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
}

mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
_mi_heap_init_ex(heap, bheap->tld, arena_id);
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
// push on the thread local heaps list
heap->next = heap->tld->heaps;
Expand Down
24 changes: 10 additions & 14 deletions Objects/mimalloc/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,24 +297,20 @@ static bool _mi_heap_init(void) {
mi_thread_data_t* td = mi_thread_data_zalloc();
if (td == NULL) return false;

mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
_mi_tld_init(&td->tld, &td->heap);
_mi_heap_init_ex(&td->heap, &td->tld, _mi_arena_id_none());
_mi_heap_set_default_direct(&td->heap);
}
return false;
}

void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id();
_mi_random_init(&heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
heap->tld = tld;
tld->heap_backing = heap;
tld->heaps = heap;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->os.stats = &tld->stats;
_mi_heap_set_default_direct(heap);
}
return false;
tld->heap_backing = bheap;
tld->heaps = bheap;
}

// Free the thread local default heap (called from `mi_thread_done`)
Expand Down
36 changes: 36 additions & 0 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,37 @@ _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
void *
_PyMem_MiMalloc(void *ctx, size_t size)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
return mi_heap_malloc(heap, size);
#else
return mi_malloc(size);
#endif
}

void *
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
return mi_heap_calloc(heap, nelem, elsize);
#else
return mi_calloc(nelem, elsize);
#endif
}

void *
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
return mi_heap_realloc(heap, ptr, size);
#else
return mi_realloc(ptr, size);
#endif
}

void
Expand All @@ -112,20 +130,38 @@ _PyMem_MiFree(void *ctx, void *ptr)
void *
_PyObject_MiMalloc(void *ctx, size_t nbytes)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
mi_heap_t *heap = tstate->mimalloc.current_object_heap;
return mi_heap_malloc(heap, nbytes);
#else
return mi_malloc(nbytes);
#endif
}

void *
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
mi_heap_t *heap = tstate->mimalloc.current_object_heap;
return mi_heap_calloc(heap, nelem, elsize);
#else
return mi_calloc(nelem, elsize);
#endif
}


void *
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
mi_heap_t *heap = tstate->mimalloc.current_object_heap;
return mi_heap_realloc(heap, ptr, nbytes);
#else
return mi_realloc(ptr, nbytes);
#endif
}

void
Expand Down
4 changes: 4 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1794,6 +1794,10 @@ finalize_interp_clear(PyThreadState *tstate)
}

finalize_interp_types(tstate->interp);

/* finalize_interp_types may allocate Python objects so we may need to
abandon mimalloc segments again */
_PyThreadState_ClearMimallocHeaps(tstate);
}


Expand Down
55 changes: 55 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ tstate_is_bound(PyThreadState *tstate)
static void bind_gilstate_tstate(PyThreadState *);
static void unbind_gilstate_tstate(PyThreadState *);

static void tstate_mimalloc_bind(PyThreadState *);

static void
bind_tstate(PyThreadState *tstate)
{
Expand All @@ -256,6 +258,9 @@ bind_tstate(PyThreadState *tstate)
tstate->native_thread_id = PyThread_get_thread_native_id();
#endif

// mimalloc state needs to be initialized from the active thread.
tstate_mimalloc_bind(tstate);

tstate->_status.bound = 1;
}

Expand Down Expand Up @@ -1533,6 +1538,8 @@ PyThreadState_Clear(PyThreadState *tstate)
tstate->on_delete(tstate->on_delete_data);
}

_PyThreadState_ClearMimallocHeaps(tstate);

tstate->_status.cleared = 1;

// XXX Call _PyThreadStateSwap(runtime, NULL) here if "current".
Expand Down Expand Up @@ -2495,3 +2502,51 @@ _PyThreadState_MustExit(PyThreadState *tstate)
}
return 1;
}

/********************/
/* mimalloc support */
/********************/

static void
tstate_mimalloc_bind(PyThreadState *tstate)
{
#ifdef Py_GIL_DISABLED
struct _mimalloc_thread_state *mts = &((_PyThreadStateImpl*)tstate)->mimalloc;

// Initialize the mimalloc thread state. This must be called from the
// same thread that will use the thread state. The "mem" heap doubles as
// the "backing" heap.
mi_tld_t *tld = &mts->tld;
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);

// Initialize each heap
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
_mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none());
}

// By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this when looking at some of the later commits in your branch, but gc_alloc calls PyObject_Malloc. Is there a reason we can't add a PyGCObject_Malloc instead of doing this weird heap switching? It would presumably mean an expansion of the PyMemAllocatorDomain but it seems like that would be okay?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vstinner What do you think about adding a new domain for this vs having some oddness around allocating GC objects? We probably actually need 2 domains though for objects w/ and w/o pre-headers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some context: all PyObject allocations go through PyObject_Malloc, but we need to use different mimalloc heaps for non-GC objects, GC objects, and GC objects with pre-headers. As written, we'll do this by modifying the mi_heap_t *current_object_heap field. But you could also do this by having a different memory domain for each heap. Something like PYMEM_DOMAIN_OBJ, PYMEM_DOMAIN_GC, PYMEM_DOMAIN_GC_PRE.

The advantage of adding new memory domains is that it is more explicit and cleaner.

The disadvantage is that code (including extensions) that uses PyMem_SetAllocator to intercept allocations will likely need to be modified to handle the new domains.

// _PyObject_GC_New() and similar functions temporarily override this to
// use one of the GC heaps.
mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT];
#endif
}

void
_PyThreadState_ClearMimallocHeaps(PyThreadState *tstate)
{
#ifdef Py_GIL_DISABLED
if (!tstate->_status.bound) {
// The mimalloc heaps are only initialized when the thread is bound.
return;
}

_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
// Abandon all segments in use by this thread. This pushes them to
// a shared pool to later be reclaimed by other threads. It's important
// to do this before the thread state is destroyed so that objects
// remain visible to the GC.
_mi_heap_collect_abandon(&tstate_impl->mimalloc.heaps[i]);
}
#endif
}
Loading