Skip to content

Commit

Permalink
optimize remset marking (JuliaLang#52476)
Browse files Browse the repository at this point in the history
Tag the lowest bit of a pointer to indicate it's in the remset and
enqueue objects in the remset for later processing when GC threads have
woken up, instead of sequentially marking them all at once.

In principle, this should allow for more parallelism in the mark phase,
though I didn't benchmark it yet.
  • Loading branch information
d-netto committed Jul 3, 2024
1 parent 2ed98a2 commit e4b3521
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
21 changes: 10 additions & 11 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2618,13 +2618,12 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
}

// Enqueue and mark all outgoing references from `new_obj` which have not been marked
// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
// objects which have been enqueued into the `remset`
FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
int meta_updated)
// Enqueue and mark all outgoing references from `new_obj` which have not been marked yet.
// `_new_obj` has its lowest bit tagged if it's in the remset (in which case we shouldn't update page metadata)
FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj)
{
jl_value_t *new_obj = (jl_value_t *)_new_obj;
int meta_updated = (uintptr_t)_new_obj & GC_REMSET_PTR_TAG;
jl_value_t *new_obj = (jl_value_t *)((uintptr_t)_new_obj & ~(uintptr_t)GC_REMSET_PTR_TAG);
mark_obj: {
#ifdef JL_DEBUG_BUILD
if (new_obj == gc_findval)
Expand Down Expand Up @@ -2935,7 +2934,7 @@ void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
if (__unlikely(new_obj == NULL)) {
return;
}
gc_mark_outrefs(ptls, mq, new_obj, 0);
gc_mark_outrefs(ptls, mq, new_obj);
}
}

Expand Down Expand Up @@ -2984,7 +2983,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
goto steal;
}
mark : {
gc_mark_outrefs(ptls, mq, new_obj, 0);
gc_mark_outrefs(ptls, mq, new_obj);
goto pop;
}
// Note that for the stealing heuristics, we try to
Expand Down Expand Up @@ -3245,9 +3244,9 @@ static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
size_t len = ptls2->heap.last_remset->len;
void **items = ptls2->heap.last_remset->items;
for (size_t i = 0; i < len; i++) {
// Objects in the `remset` are already marked,
// so a `gc_try_claim_and_push` wouldn't work here
gc_mark_outrefs(ptls, &ptls->mark_queue, (jl_value_t *)items[i], 1);
// Tag the pointer to indicate it's in the remset
jl_value_t *v = (jl_value_t *)((uintptr_t)items[i] | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(&ptls->mark_queue, v);
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ typedef struct _jl_gc_chunk_t {
#define GC_PTR_QUEUE_INIT_SIZE (1 << 18) // initial size of queue of `jl_value_t *`
#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14) // initial size of chunk-queue

#define GC_REMSET_PTR_TAG (0x1) // lowest bit of `jl_value_t *` is tagged if it's in the remset

// layout for big (>2k) objects

JL_EXTENSION typedef struct _bigval_t {
Expand Down

0 comments on commit e4b3521

Please sign in to comment.