Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cleanup remset logic a bit #55021

Merged
merged 1 commit into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 39 additions & 54 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1908,7 +1908,7 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
// which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
uintptr_t header = jl_atomic_fetch_and_relaxed((_Atomic(uintptr_t) *)&o->header, ~GC_OLD);
if (header & GC_OLD) { // write barrier has not been triggered in this object yet
arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
arraylist_push(&ptls->heap.remset, (jl_value_t*)ptr);
ptls->heap.remset_nptr++; // conservative
}
}
Expand Down Expand Up @@ -2012,7 +2012,7 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
{
if (__unlikely((nptr & 0x3) == 0x3)) {
ptls->heap.remset_nptr += nptr >> 2;
arraylist_t *remset = ptls->heap.remset;
arraylist_t *remset = &ptls->heap.remset;
size_t len = remset->len;
if (__unlikely(len >= remset->max)) {
arraylist_push(remset, obj);
Expand Down Expand Up @@ -3188,23 +3188,6 @@ void gc_mark_clean_reclaim_sets(void)
}
}

static void gc_premark(jl_ptls_t ptls2)
{
arraylist_t *remset = ptls2->heap.remset;
ptls2->heap.remset = ptls2->heap.last_remset;
ptls2->heap.last_remset = remset;
ptls2->heap.remset->len = 0;
ptls2->heap.remset_nptr = 0;
// avoid counting remembered objects
// in `perm_scanned_bytes`
size_t len = remset->len;
void **items = remset->items;
for (size_t i = 0; i < len; i++) {
jl_value_t *item = (jl_value_t *)items[i];
jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
}
}

static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
{
jl_task_t *task;
Expand Down Expand Up @@ -3248,14 +3231,29 @@ static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
}
}

static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
{
size_t len = ptls2->heap.last_remset->len;
void **items = ptls2->heap.last_remset->items;
void **items = ptls2->heap.remset.items;
size_t len = ptls2->heap.remset.len;
for (size_t i = 0; i < len; i++) {
// Tag the pointer to indicate it's in the remset
jl_value_t *v = (jl_value_t *)((uintptr_t)items[i] | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(&ptls->mark_queue, v);
void *_v = items[i];
jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(mq, v);
}
// Don't forget to clear the remset
ptls2->heap.remset.len = 0;
ptls2->heap.remset_nptr = 0;
}

static void gc_check_all_remsets_are_empty(void)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL) {
assert(ptls2->heap.remset.len == 0);
assert(ptls2->heap.remset_nptr == 0);
}
}
}

Expand Down Expand Up @@ -3492,15 +3490,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
JL_PROBE_GC_MARK_BEGIN();
{
JL_TIMING(GC, GC_Mark);

// 1. fix GC bits of objects in the remset.
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL)
gc_premark(ptls2);
}

assert(gc_n_threads);
int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
Expand All @@ -3513,17 +3502,18 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
mq_dest = &ptls_dest->mark_queue;
}
if (ptls2 != NULL) {
// 2.1. mark every thread local root
// 1.1. mark every thread local root
gc_queue_thread_local(mq_dest, ptls2);
// 2.2. mark any managed objects in the backtrace buffer
// 1.2. mark any managed objects in the backtrace buffer
// TODO: treat these as roots for gc_heap_snapshot_record
gc_queue_bt_buf(mq_dest, ptls2);
// 2.3. mark every object in the `last_remsets` and `rem_binding`
gc_queue_remset(ptls_dest, ptls2);
// 1.3. mark every object in the remset
gc_queue_remset(mq_dest, ptls2);
}
}
gc_check_all_remsets_are_empty();

// 3. walk roots
// 2. walk roots
gc_mark_roots(mq);
if (gc_cblist_root_scanner) {
gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
Expand All @@ -3533,7 +3523,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_mark_loop_barrier();
gc_mark_clean_reclaim_sets();

// 4. check for objects to finalize
// 3. check for objects to finalize
clear_weak_refs();
// Record the length of the marked list since we need to
// mark the object moved to the marked list from the
Expand Down Expand Up @@ -3591,7 +3581,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_num.total_allocd += gc_num.allocd;
if (!prev_sweep_full)
promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
// 5. next collection decision
// 4. next collection decision
int remset_nptr = 0;
int sweep_full = next_sweep_full;
int recollect = 0;
Expand Down Expand Up @@ -3621,7 +3611,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
promoted_bytes = 0;
}
scanned_bytes = 0;
// 6. start sweeping
// 5. start sweeping
uint64_t start_sweep_time = jl_hrtime();
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
{
Expand Down Expand Up @@ -3755,21 +3745,21 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
if (heap_size > user_max || thrashing)
under_pressure = 1;
// sweeping is over
// 7. if it is a quick sweep, put back the remembered objects in queued state
// 6. if it is a quick sweep, put back the remembered objects in queued state
// so that we don't trigger the barrier again on them.
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 == NULL)
continue;
if (!sweep_full) {
for (int i = 0; i < ptls2->heap.remset->len; i++) {
void *ptr = ptls2->heap.remset->items[i];
for (int i = 0; i < ptls2->heap.remset.len; i++) {
void *ptr = ptls2->heap.remset.items[i];
jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
}
}
else {
ptls2->heap.remset->len = 0;
ptls2->heap.remset.len = 0;
}
// free empty GC state for threads that have exited
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
Expand All @@ -3780,10 +3770,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
small_arraylist_free(&heap->weak_refs);
if (heap->live_tasks.len == 0)
small_arraylist_free(&heap->live_tasks);
if (heap->remset->len == 0)
arraylist_free(heap->remset);
if (heap->last_remset->len == 0)
arraylist_free(heap->last_remset);
if (heap->remset.len == 0)
arraylist_free(&heap->remset);
if (ptls2->finalizers.len == 0)
arraylist_free(&ptls2->finalizers);
if (ptls2->sweep_objs.len == 0)
Expand Down Expand Up @@ -3977,10 +3965,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
heap->remset = &heap->_remset[0];
heap->last_remset = &heap->_remset[1];
arraylist_new(heap->remset, 0);
arraylist_new(heap->last_remset, 0);
arraylist_new(&heap->remset, 0);
arraylist_new(&ptls->finalizers, 0);
arraylist_new(&ptls->sweep_objs, 0);

Expand Down
6 changes: 2 additions & 4 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,10 @@ typedef struct {
// variables for tracking big objects
struct _bigval_t *big_objects;

// variables for tracking "remembered set"
arraylist_t _remset[2]; // contains jl_value_t*
// lower bound of the number of pointers inside remembered values
int remset_nptr;
arraylist_t *remset;
arraylist_t *last_remset;
// remembered set
arraylist_t remset;

// variables for allocating objects from pools
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
Expand Down