From 189ad0849f5459b3871c79fd7fddd10d86c301a8 Mon Sep 17 00:00:00 2001 From: d-netto Date: Wed, 4 Oct 2023 13:04:06 -0300 Subject: [PATCH] revert membalancer heuristics, but keep per-page accounting --- src/gc.c | 146 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 77 insertions(+), 69 deletions(-) diff --git a/src/gc.c b/src/gc.c index 190b9810010e9..cb920e91429bc 100644 --- a/src/gc.c +++ b/src/gc.c @@ -682,28 +682,22 @@ static int64_t last_gc_total_bytes = 0; #ifdef _P64 typedef uint64_t memsize_t; static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); +static const size_t max_collect_interval = 1250000000UL; static size_t total_mem; // We expose this to the user/ci as jl_gc_set_max_memory static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; #else typedef uint32_t memsize_t; static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +static const size_t max_collect_interval = 500000000UL; // Work really hard to stay within 2GB // Alternative is to risk running out of address space // on 32 bit architectures. #define MAX32HEAP 1536 * 1024 * 1024 static memsize_t max_total_memory = (memsize_t) MAX32HEAP; #endif -// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323 -static uint64_t old_pause_time = 0; -static uint64_t old_mut_time = 0; -static uint64_t old_heap_size = 0; -static uint64_t old_alloc_diff = 0; -static uint64_t old_freed_diff = 0; -static uint64_t gc_end_time = 0; -static int thrash_counter = 0; -static int thrashing = 0; // global variables for GC stats +static uint64_t gc_end_time = 0; // Resetting the object to a young object, this is used when marking the // finalizer list to collect them the next time because the object is very @@ -3211,8 +3205,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) jl_gc_markqueue_t *mq = &ptls->mark_queue; uint64_t gc_start_time = jl_hrtime(); - uint64_t mutator_time = gc_start_time - gc_end_time; - uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); int64_t last_perm_scanned_bytes = perm_scanned_bytes; uint64_t start_mark_time = jl_hrtime(); JL_PROBE_GC_MARK_BEGIN(); @@ -3304,11 +3296,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) gc_num.mark_time = mark_time; gc_num.total_mark_time += mark_time; gc_settime_postmark_end(); + int64_t actual_allocd = gc_num.allocd; // marking is over // Flush everything in mark cache gc_sync_all_caches_nolock(ptls); - + int64_t live_sz_ub = live_bytes + actual_allocd; + int64_t live_sz_est = scanned_bytes + perm_scanned_bytes; + int64_t estimate_freed = live_sz_ub - live_sz_est; gc_verify(ptls); gc_stats_all_pool(); @@ -3319,21 +3314,50 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (!prev_sweep_full) promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes; // 5. next collection decision - int remset_nptr = 0; - int sweep_full = next_sweep_full; - int recollect = 0; + int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10)); + int nptr = 0; assert(gc_n_threads); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; - if (ptls2 != NULL) - remset_nptr += ptls2->heap.remset_nptr; + if (ptls2 == NULL) + continue; + nptr += ptls2->heap.remset_nptr; } - (void)remset_nptr; //Use this information for something? + // many pointers in the intergen frontier => "quick" mark is not quick + int large_frontier = nptr*sizeof(void*) >= default_collect_interval; + int sweep_full = 0; + int recollect = 0; + + // update heuristics only if this GC was automatically triggered + if (collection == JL_GC_AUTO) { + if (large_frontier) { + sweep_full = 1; + gc_num.interval = last_long_collect_interval; + } + if (not_freed_enough || large_frontier) { + gc_num.interval = gc_num.interval * 2; + } + + size_t maxmem = 0; +#ifdef _P64 + // on a big memory machine, increase max_collect_interval to totalmem / nthreads / 2 + maxmem = total_mem / gc_n_threads / 2; +#endif + if (maxmem < max_collect_interval) + maxmem = max_collect_interval; + if (gc_num.interval > maxmem) { + sweep_full = 1; + gc_num.interval = maxmem; + } + } // If the live data outgrows the suggested max_total_memory // we keep going with minimum intervals and full gcs until // we either free some space or get an OOM error. + if (live_bytes > max_total_memory) { + sweep_full = 1; + } if (gc_sweep_always_full) { sweep_full = 1; } @@ -3346,6 +3370,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // on the first collection after sweep_full, and the current scan perm_scanned_bytes = 0; promoted_bytes = 0; + last_long_collect_interval = gc_num.interval; } scanned_bytes = 0; pool_live_bytes = 0; @@ -3387,56 +3412,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) gc_num.last_incremental_sweep = gc_end_time; } - size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); - double target_allocs = 0.0; - double min_interval = default_collect_interval; - if (collection == JL_GC_AUTO) { - uint64_t alloc_diff = before_free_heap_size - old_heap_size; - uint64_t freed_diff = before_free_heap_size - heap_size; - double alloc_smooth_factor = 0.95; - double collect_smooth_factor = 0.5; - double tuning_factor = 0.03; - double alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor); - double alloc_time = jl_gc_smooth(old_mut_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator - double gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor); - double gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor); - old_alloc_diff = alloc_diff; - old_mut_time = mutator_time; - old_freed_diff = freed_diff; - old_pause_time = pause; - old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC - if (gc_time > alloc_time * 95 && !(thrash_counter < 4)) - thrash_counter += 1; - else if (thrash_counter > 0) - thrash_counter -= 1; - if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0 ) { - double alloc_rate = alloc_mem/alloc_time; - double gc_rate = gc_mem/gc_time; - target_allocs = sqrt(((double)heap_size/min_interval * alloc_rate)/(gc_rate * tuning_factor)); // work on multiples of min interval - } - } - if (thrashing == 0 && thrash_counter >= 3) - thrashing = 1; - else if (thrashing == 1 && thrash_counter <= 2) - thrashing = 0; // maybe we should report this to the user or error out? - - int bad_result = (target_allocs*min_interval + heap_size) > 2 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target); // Don't follow through on a bad decision - if (target_allocs == 0.0 || thrashing || bad_result) // If we are thrashing go back to default - target_allocs = 2*sqrt((double)heap_size/min_interval); - uint64_t target_heap = (uint64_t)target_allocs*min_interval + heap_size; - if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die - target_heap = max_total_memory; - else if (target_heap < default_collect_interval) - target_heap = default_collect_interval; - jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap); - - double old_ratio = (double)promoted_bytes/(double)heap_size; - if (heap_size > max_total_memory * 0.8 || old_ratio > 0.15) - next_sweep_full = 1; - else - next_sweep_full = 0; - if (heap_size > max_total_memory * 0.8 || thrashing) - under_pressure = 1; // sweeping is over // 7. if it is a quick sweep, put back the remembered objects in queued state // so that we don't trigger the barrier again on them. @@ -3478,7 +3453,40 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) estimate_freed, sweep_full); gc_num.full_sweep += sweep_full; last_live_bytes = live_bytes; - live_bytes += -gc_num.freed + gc_num.allocd; + live_bytes = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); + + if (collection == JL_GC_AUTO) { + //If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster + if (!not_freed_enough || large_frontier) { + int64_t tot = 2 * (live_bytes + actual_allocd) / 3; + if (gc_num.interval > tot) { + gc_num.interval = tot; + last_long_collect_interval = tot; + } + // If the current interval is larger than half the live data decrease the interval + } + else { + int64_t half = (live_bytes / 2); + if (gc_num.interval > half) + gc_num.interval = half; + } + + // But never go below default + if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval; + } + + if (gc_num.interval + live_bytes > max_total_memory) { + if (live_bytes < max_total_memory) { + gc_num.interval = max_total_memory - live_bytes; + last_long_collect_interval = max_total_memory - live_bytes; + } + else { + // We can't stay under our goal so let's go back to + // the minimum interval and hope things get better + gc_num.interval = default_collect_interval; + } + } + jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed); gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,