From 745139a192806b53ac75dff472ca2c5281de283e Mon Sep 17 00:00:00 2001 From: d-netto Date: Tue, 3 Sep 2024 13:16:45 -0300 Subject: [PATCH] instrument GC to breakdown times spent in each step of sweeping --- base/timing.jl | 3 ++ src/gc.c | 97 ++++++++++++++++++++++++++++---------------------- src/gc.h | 3 ++ 3 files changed, 60 insertions(+), 43 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index bdbb32936b56f..73a3c5dc7d5e5 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -23,6 +23,9 @@ struct GC_Num sweep_time ::Int64 mark_time ::Int64 total_sweep_time ::Int64 + total_sweep_page_walk_time ::Int64 + total_sweep_madvise_time ::Int64 + total_sweep_free_mallocd_memory_time ::Int64 total_mark_time ::Int64 last_full_sweep ::Int64 last_incremental_sweep ::Int64 diff --git a/src/gc.c b/src/gc.c index 4cb48ba72dfe1..dad5768732545 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1518,8 +1518,11 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa // sweep over all memory that is being used and not in a pool static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT { + uint64_t t_free_mallocd_memory_start = jl_hrtime(); sweep_malloced_arrays(); sweep_big(ptls); + uint64_t t_free_mallocd_memory_end = jl_hrtime(); + gc_num.total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start; } static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT @@ -1776,58 +1779,63 @@ static void gc_sweep_pool(void) } } - // the actual sweeping - jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t)); - jl_ptls_t ptls = jl_current_task->ptls; - gc_sweep_wake_all(ptls, new_gc_allocd_scratch); - gc_sweep_pool_parallel(ptls); - gc_sweep_wait_for_all(); - - // reset half-pages pointers - for (int t_i = 0; t_i < n_threads; t_i++) { - jl_ptls_t ptls2 = gc_all_tls_states[t_i]; - if (ptls2 != NULL) { - ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack; - for (int i = 0; i < JL_GC_N_POOLS; i++) { - jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; - p->newpages = NULL; + uint64_t t_page_walk_start = jl_hrtime(); + { + // the actual sweeping + jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t)); + jl_ptls_t ptls = jl_current_task->ptls; + gc_sweep_wake_all(ptls, new_gc_allocd_scratch); + gc_sweep_pool_parallel(ptls); + gc_sweep_wait_for_all(); + + // reset half-pages pointers + for (int t_i = 0; t_i < n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 != NULL) { + ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack; + for (int i = 0; i < JL_GC_N_POOLS; i++) { + jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; + p->newpages = NULL; + } } } - } - // merge free lists - for (int t_i = 0; t_i < n_threads; t_i++) { - jl_ptls_t ptls2 = gc_all_tls_states[t_i]; - if (ptls2 == NULL) { - continue; - } - jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom); - while (pg != NULL) { - jl_gc_pagemeta_t *pg2 = pg->next; - if (pg->fl_begin_offset != UINT16_MAX) { - char *cur_pg = pg->data; - jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset); - jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset); - *pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg; - pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next; + // merge free lists + for (int t_i = 0; t_i < n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 == NULL) { + continue; + } + jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom); + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + if (pg->fl_begin_offset != UINT16_MAX) { + char *cur_pg = pg->data; + jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset); + jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset); + *pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg; + pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next; + } + pg = pg2; } - pg = pg2; } - } - // null out terminal pointers of free lists - for (int t_i = 0; t_i < n_threads; t_i++) { - jl_ptls_t ptls2 = gc_all_tls_states[t_i]; - if (ptls2 != NULL) { - for (int i = 0; i < JL_GC_N_POOLS; i++) { - *pfl[t_i * JL_GC_N_POOLS + i] = NULL; + // null out terminal pointers of free lists + for (int t_i = 0; t_i < n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 != NULL) { + for (int i = 0; i < JL_GC_N_POOLS; i++) { + *pfl[t_i * JL_GC_N_POOLS + i] = NULL; + } } } - } - // cleanup - free(pfl); - free(new_gc_allocd_scratch); + // cleanup + free(pfl); + free(new_gc_allocd_scratch); + } + uint64_t t_page_walk_end = jl_hrtime(); + gc_num.total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start; #ifdef _P64 // only enable concurrent sweeping on 64bit // wake thread up to sweep concurrently @@ -1835,7 +1843,10 @@ static void gc_sweep_pool(void) uv_sem_post(&gc_sweep_assists_needed); } else { + uint64_t t_madvise_start = jl_hrtime(); gc_free_pages(); + uint64_t t_madvise_end = jl_hrtime(); + gc_num.total_sweep_madvise_time += t_madvise_end - t_madvise_start; } #else gc_free_pages(); diff --git a/src/gc.h b/src/gc.h index b4d421c708547..b06deec9d7238 100644 --- a/src/gc.h +++ b/src/gc.h @@ -83,6 +83,9 @@ typedef struct { uint64_t sweep_time; uint64_t mark_time; uint64_t total_sweep_time; + uint64_t total_sweep_page_walk_time; + uint64_t total_sweep_madvise_time; + uint64_t total_sweep_free_mallocd_memory_time; uint64_t total_mark_time; uint64_t last_full_sweep; uint64_t last_incremental_sweep;