Skip to content

Commit

Permalink
gh-115103: Delay reuse of mimalloc pages that store PyObjects (#115435)
Browse files Browse the repository at this point in the history
This implements the delayed reuse of mimalloc pages that contain Python
objects in the free-threaded build.

Allocations of the same size class are grouped in data structures called
pages. These are different from operating system pages. For thread-safety, we
want to ensure that memory used to store PyObjects remains valid as long as
there may be concurrent lock-free readers; we want to delay using it for
other size classes, in other heaps, or returning it to the operating system.

When a mimalloc page becomes empty, instead of immediately freeing it, we tag
it with a QSBR goal and insert it into a per-thread state linked list of
pages to be freed. When mimalloc needs a fresh page, we process the queue and
free any still empty pages that are now deemed safe to be freed. Pages
waiting to be freed are still available for allocations of the same size
class and allocating from a page prevent it from being freed. There is
additional logic to handle abandoned pages when threads exit.
  • Loading branch information
colesbury authored Mar 6, 2024
1 parent 02ee475 commit c012c8a
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 17 deletions.
9 changes: 8 additions & 1 deletion Include/internal/mimalloc/mimalloc/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ typedef struct mi_page_s {
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
uint8_t use_qsbr : 1; // delay page freeing using qsbr
uint8_t tag : 4; // tag from the owning heap
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory

Expand All @@ -336,8 +337,13 @@ typedef struct mi_page_s {
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`

#ifdef Py_GIL_DISABLED
struct llist_node qsbr_node;
uint64_t qsbr_goal;
#endif

// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
#if MI_INTPTR_SIZE==8
#if MI_INTPTR_SIZE==8 && !defined(Py_GIL_DISABLED)
uintptr_t padding[1];
#endif
} mi_page_t;
Expand Down Expand Up @@ -555,6 +561,7 @@ struct mi_heap_s {
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
uint8_t tag; // custom identifier for this heap
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
bool page_use_qsbr; // should freeing pages be delayed using QSBR
};


Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_mimalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct _mimalloc_thread_state {
mi_heap_t *current_object_heap;
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
mi_tld_t tld;
struct llist_node page_list;
};
#endif

Expand Down
15 changes: 15 additions & 0 deletions Include/internal/pycore_qsbr.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ extern "C" {
#define QSBR_INITIAL 1
#define QSBR_INCR 2

// Wrap-around safe comparison. This is a holdover from the FreeBSD
// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
// sequence numbers, so wrap-around is unlikely.
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)

struct _qsbr_shared;
struct _PyThreadStateImpl; // forward declare to avoid circular dependency

Expand Down Expand Up @@ -89,6 +95,15 @@ _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
_Py_atomic_store_uint64_release(&qsbr->seq, seq);
}

// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
// but does not perform a scan of threads.
static inline bool
_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
{
uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
return QSBR_LEQ(goal, rd_seq);
}

// Advance the write sequence and return the new goal. This should be called
// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
// determine when it is safe to reclaim (free) the memory.
Expand Down
8 changes: 7 additions & 1 deletion Objects/mimalloc/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
if (mi_page_all_free(page)) {
// no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
bool freed = _PyMem_mi_page_maybe_free(page, pq, collect >= MI_FORCE);
if (!freed && collect == MI_ABANDON) {
_mi_page_abandon(page, pq);
}
}
else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page
Expand Down Expand Up @@ -153,6 +156,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// collect retired pages
_mi_heap_collect_retired(heap, force);

// free pages that were delayed with QSBR
_PyMem_mi_heap_collect_qsbr(heap);

// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
Expand Down
35 changes: 33 additions & 2 deletions Objects/mimalloc/page.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {

// and the local free list
if (page->local_free != NULL) {
// any previous QSBR goals are no longer valid because we reused the page
_PyMem_mi_page_clear_qsbr(page);

if mi_likely(page->free == NULL) {
// usual case
page->free = page->local_free;
Expand Down Expand Up @@ -267,6 +270,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_push(heap, pq, page);
_PyMem_mi_page_reclaimed(page);
mi_assert_expensive(_mi_page_is_valid(page));
}

Expand Down Expand Up @@ -383,6 +387,13 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {

mi_heap_t* pheap = mi_page_heap(page);

#ifdef Py_GIL_DISABLED
if (page->qsbr_node.next != NULL) {
// remove from QSBR queue, but keep the goal
llist_remove(&page->qsbr_node);
}
#endif

// remove from our page list
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
mi_page_queue_remove(pq, page);
Expand Down Expand Up @@ -417,6 +428,11 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {

mi_heap_t* heap = mi_page_heap(page);

#ifdef Py_GIL_DISABLED
mi_assert_internal(page->qsbr_goal == 0);
mi_assert_internal(page->qsbr_node.next == NULL);
#endif

// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_segments_tld_t* segments_tld = &heap->tld->segments;
Expand Down Expand Up @@ -444,6 +460,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {

mi_page_set_has_aligned(page, false);

// any previous QSBR goals are no longer valid because we reused the page
_PyMem_mi_page_clear_qsbr(page);

// don't retire too often..
// (or we end up retiring and re-allocating most of the time)
// NOTE: refine this more: we should not retire if this
Expand All @@ -465,7 +484,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
return; // dont't free after all
}
}
_mi_page_free(page, pq, false);
_PyMem_mi_page_maybe_free(page, pq, false);
}

// free retired pages: we don't need to look at the entire queues
Expand All @@ -480,7 +499,10 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
if (mi_page_all_free(page)) {
page->retire_expire--;
if (force || page->retire_expire == 0) {
_mi_page_free(pq->first, pq, force);
#ifdef Py_GIL_DISABLED
mi_assert_internal(page->qsbr_goal == 0);
#endif
_PyMem_mi_page_maybe_free(page, pq, force);
}
else {
// keep retired, update min/max
Expand Down Expand Up @@ -661,6 +683,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
// set fields
mi_page_set_heap(page, heap);
page->tag = heap->tag;
page->use_qsbr = heap->page_use_qsbr;
page->debug_offset = heap->debug_offset;
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
size_t page_size;
Expand Down Expand Up @@ -691,6 +714,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->xthread_free == 0);
mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL);
#ifdef Py_GIL_DISABLED
mi_assert_internal(page->qsbr_goal == 0);
mi_assert_internal(page->qsbr_node.next == NULL);
#endif
mi_assert_internal(page->retire_expire == 0);
mi_assert_internal(!mi_page_has_aligned(page));
#if (MI_PADDING || MI_ENCODE_FREELIST)
Expand Down Expand Up @@ -750,6 +777,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
mi_heap_stat_counter_increase(heap, searches, count);

if (page == NULL) {
_PyMem_mi_heap_collect_qsbr(heap); // some pages might be safe to free now
_mi_heap_collect_retired(heap, false); // perhaps make a page available?
page = mi_page_fresh(heap, pq);
if (page == NULL && first_try) {
Expand All @@ -760,6 +788,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
else {
mi_assert(pq->first == page);
page->retire_expire = 0;
_PyMem_mi_page_clear_qsbr(page);
}
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
return page;
Expand All @@ -785,6 +814,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {

if (mi_page_immediate_available(page)) {
page->retire_expire = 0;
_PyMem_mi_page_clear_qsbr(page);
return page; // fast path
}
}
Expand Down Expand Up @@ -878,6 +908,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
return NULL;
}
else {
_PyMem_mi_heap_collect_qsbr(heap);
return mi_large_huge_page_alloc(heap,size,huge_alignment);
}
}
Expand Down
16 changes: 13 additions & 3 deletions Objects/mimalloc/segment.c
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,10 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
mi_assert_internal(mi_page_all_free(page));
mi_segment_t* segment = _mi_ptr_segment(page);
mi_assert_internal(segment->used > 0);
#ifdef Py_GIL_DISABLED
mi_assert_internal(page->qsbr_goal == 0);
mi_assert_internal(page->qsbr_node.next == NULL);
#endif

size_t inuse = page->capacity * mi_page_block_size(page);
_mi_stat_decrease(&tld->stats->page_committed, inuse);
Expand Down Expand Up @@ -1270,10 +1274,13 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
// ensure used count is up to date and collect potential concurrent frees
mi_page_t* const page = mi_slice_to_page(slice);
_mi_page_free_collect(page, false);
if (mi_page_all_free(page)) {
if (mi_page_all_free(page) && _PyMem_mi_page_is_safe_to_free(page)) {
// if this page is all free now, free it without adding to any queues (yet)
mi_assert_internal(page->next == NULL && page->prev==NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
#ifdef Py_GIL_DISABLED
page->qsbr_goal = 0;
#endif
segment->abandoned--;
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
mi_assert_internal(!mi_slice_is_used(slice));
Expand Down Expand Up @@ -1344,15 +1351,18 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
mi_page_set_heap(page, target_heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
if (mi_page_all_free(page) && _PyMem_mi_page_is_safe_to_free(page)) {
// if everything free by now, free the page
#ifdef Py_GIL_DISABLED
page->qsbr_goal = 0;
#endif
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it into the heap
_mi_page_reclaim(target_heap, page);
if (requested_block_size == page->xblock_size && mi_page_has_any_available(page) &&
heap == target_heap) {
requested_block_size <= MI_MEDIUM_OBJ_SIZE_MAX && heap == target_heap) {
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
}
}
Expand Down
113 changes: 113 additions & 0 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
#include <stdlib.h> // malloc()
#include <stdbool.h>
#ifdef WITH_MIMALLOC
// Forward declarations of functions used in our mimalloc modifications
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
# include "pycore_mimalloc.h"
# include "mimalloc/static.c"
# include "mimalloc/internal.h" // for stats
Expand Down Expand Up @@ -86,6 +92,113 @@ _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)

#ifdef WITH_MIMALLOC

static void
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
{
#ifdef Py_GIL_DISABLED
// Clear the QSBR goal and remove the page from the QSBR linked list.
page->qsbr_goal = 0;
if (page->qsbr_node.next != NULL) {
llist_remove(&page->qsbr_node);
}
#endif
}

// Check if an empty, newly reclaimed page is safe to free now.
static bool
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
{
assert(mi_page_all_free(page));
#ifdef Py_GIL_DISABLED
assert(page->qsbr_node.next == NULL);
if (page->use_qsbr && page->qsbr_goal != 0) {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
if (tstate == NULL) {
return false;
}
return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
}
#endif
return true;

}

static bool
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
{
#ifdef Py_GIL_DISABLED
assert(mi_page_all_free(page));
if (page->use_qsbr) {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
_PyMem_mi_page_clear_qsbr(page);
_mi_page_free(page, pq, force);
return true;
}

_PyMem_mi_page_clear_qsbr(page);
page->retire_expire = 0;
page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr);
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
return false;
}
#endif
_mi_page_free(page, pq, force);
return true;
}

static void
_PyMem_mi_page_reclaimed(mi_page_t *page)
{
#ifdef Py_GIL_DISABLED
assert(page->qsbr_node.next == NULL);
if (page->qsbr_goal != 0) {
if (mi_page_all_free(page)) {
assert(page->qsbr_node.next == NULL);
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
page->retire_expire = 0;
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
}
else {
page->qsbr_goal = 0;
}
}
#endif
}

static void
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
{
#ifdef Py_GIL_DISABLED
if (!heap->page_use_qsbr) {
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
struct llist_node *head = &tstate->mimalloc.page_list;
if (llist_empty(head)) {
return;
}

struct llist_node *node;
llist_for_each_safe(node, head) {
mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
if (!mi_page_all_free(page)) {
// We allocated from this page some point after the delayed free
_PyMem_mi_page_clear_qsbr(page);
continue;
}

if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
return;
}

_PyMem_mi_page_clear_qsbr(page);
_mi_page_free(page, mi_page_queue_of(page), false);
}
#endif
}

void *
_PyMem_MiMalloc(void *ctx, size_t size)
{
Expand Down
Loading

0 comments on commit c012c8a

Please sign in to comment.