Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

instrument GC to breakdown times spent in each step of sweeping #176

Merged
merged 1 commit into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ struct GC_Num
sweep_time ::Int64
mark_time ::Int64
total_sweep_time ::Int64
total_sweep_page_walk_time ::Int64
total_sweep_madvise_time ::Int64
total_sweep_free_mallocd_memory_time ::Int64
total_mark_time ::Int64
last_full_sweep ::Int64
last_incremental_sweep ::Int64
Expand Down
97 changes: 54 additions & 43 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1518,8 +1518,11 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
// sweep over all memory that is being used and not in a pool
static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
{
uint64_t t_free_mallocd_memory_start = jl_hrtime();
sweep_malloced_arrays();
sweep_big(ptls);
uint64_t t_free_mallocd_memory_end = jl_hrtime();
gc_num.total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start;
}

static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -1776,66 +1779,74 @@ static void gc_sweep_pool(void)
}
}

// the actual sweeping
jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
jl_ptls_t ptls = jl_current_task->ptls;
gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
gc_sweep_pool_parallel(ptls);
gc_sweep_wait_for_all();

// reset half-pages pointers
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
for (int i = 0; i < JL_GC_N_POOLS; i++) {
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
p->newpages = NULL;
uint64_t t_page_walk_start = jl_hrtime();
{
// the actual sweeping
jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
jl_ptls_t ptls = jl_current_task->ptls;
gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
gc_sweep_pool_parallel(ptls);
gc_sweep_wait_for_all();

// reset half-pages pointers
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
for (int i = 0; i < JL_GC_N_POOLS; i++) {
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
p->newpages = NULL;
}
}
}
}

// merge free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 == NULL) {
continue;
}
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
jl_gc_pagemeta_t *pg2 = pg->next;
if (pg->fl_begin_offset != UINT16_MAX) {
char *cur_pg = pg->data;
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
// merge free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 == NULL) {
continue;
}
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
jl_gc_pagemeta_t *pg2 = pg->next;
if (pg->fl_begin_offset != UINT16_MAX) {
char *cur_pg = pg->data;
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
}
pg = pg2;
}
pg = pg2;
}
}

// null out terminal pointers of free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
for (int i = 0; i < JL_GC_N_POOLS; i++) {
*pfl[t_i * JL_GC_N_POOLS + i] = NULL;
// null out terminal pointers of free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
for (int i = 0; i < JL_GC_N_POOLS; i++) {
*pfl[t_i * JL_GC_N_POOLS + i] = NULL;
}
}
}
}

// cleanup
free(pfl);
free(new_gc_allocd_scratch);
// cleanup
free(pfl);
free(new_gc_allocd_scratch);
}
uint64_t t_page_walk_end = jl_hrtime();
gc_num.total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start;

#ifdef _P64 // only enable concurrent sweeping on 64bit
// wake thread up to sweep concurrently
if (jl_n_sweepthreads > 0) {
uv_sem_post(&gc_sweep_assists_needed);
}
else {
uint64_t t_madvise_start = jl_hrtime();
gc_free_pages();
uint64_t t_madvise_end = jl_hrtime();
gc_num.total_sweep_madvise_time += t_madvise_end - t_madvise_start;
}
#else
gc_free_pages();
Expand Down
3 changes: 3 additions & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ typedef struct {
uint64_t sweep_time;
uint64_t mark_time;
uint64_t total_sweep_time;
uint64_t total_sweep_page_walk_time;
uint64_t total_sweep_madvise_time;
uint64_t total_sweep_free_mallocd_memory_time;
uint64_t total_mark_time;
uint64_t last_full_sweep;
uint64_t last_incremental_sweep;
Expand Down
Loading