Skip to content

Commit

Permalink
fix MemBalencer smoothing
Browse files Browse the repository at this point in the history
  • Loading branch information
vtjnash committed Nov 16, 2023
1 parent b1c8e12 commit 96e70e6
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 29 deletions.
36 changes: 31 additions & 5 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -850,11 +850,11 @@ void gc_time_pool_end(int sweep_full)
double sweep_speed = sweep_gb / sweep_pool_sec;
jl_safe_printf("GC sweep pools end %.2f ms at %.1f GB/s "
"(skipped %.2f %% of %" PRId64 ", swept %" PRId64 " pgs, "
"%" PRId64 " freed with %" PRId64 " lazily) %s\n",
"%" PRId64 " freed) %s\n",
sweep_pool_sec * 1000, sweep_speed,
(total_pages ? ((double)skipped_pages * 100) / total_pages : 0),
total_pages, total_pages - skipped_pages,
freed_pages, lazy_freed_pages,
freed_pages,
sweep_full ? "full" : "quick");
}

Expand Down Expand Up @@ -946,12 +946,12 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
jl_safe_printf("GC sweep pause %.2f ms live %" PRId64 " kB "
"(freed %" PRId64 " kB EST %" PRId64 " kB "
"[error %" PRId64 "] = %d%% of allocd b %" PRIu64 ") "
"(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
"(%.2f ms in post_mark) %s\n",
jl_ns2ms(sweep_pause), live_bytes / 1024,
gc_num.freed / 1024, estimate_freed / 1024,
gc_num.freed - estimate_freed, pct, gc_num.allocd / 1024,
jl_ns2ms(gc_postmark_end - gc_premark_end),
sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
sweep_full ? "full" : "quick");
}

void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
Expand All @@ -971,11 +971,37 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64
" live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %"
PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64
"ms sweep time = %" PRIu64 "ms \n",
"ms sweep time = %" PRIu64 "ms\n",
end, freed, live/1024/1024,
interval/1024/1024, pause/1000000, ttsp,
mark/1000000,sweep/1000000);
}

void gc_heuristics_summary(
uint64_t old_alloc_diff, uint64_t alloc_mem,
uint64_t old_nongc_time, uint64_t nongc_time,
uint64_t old_mut_time, uint64_t alloc_time,
uint64_t old_freed_diff, uint64_t gc_mem,
uint64_t old_pause_time, uint64_t gc_time,
int thrash_counter,
uint64_t current_heap, uint64_t target_heap)
{
jl_safe_printf("Estimates: alloc_diff=%" PRIu64 "kB (%" PRIu64 ")"
" nongc_time=%" PRIu64 "ns (%" PRIu64 ")"
" mut_time=%" PRIu64 "ns (%" PRIu64 ")"
" freed_diff=%" PRIu64 "kB (%" PRIu64 ")"
" pause_time=%" PRIu64 "ns (%" PRIu64 ")"
" thrash_counter=%d"
" current_heap=%" PRIu64 " MB"
" target_heap=%" PRIu64 " MB\n",
old_alloc_diff/1024, alloc_mem/1024,
old_nongc_time/1000, nongc_time/1000,
old_mut_time/1000, alloc_time/1000,
old_freed_diff/1024, gc_mem/1024,
old_pause_time/1000, gc_time/1000,
thrash_counter,
current_heap/1024/1024, target_heap/1024/1024);
}
#endif

void jl_gc_debug_init(void)
Expand Down
68 changes: 44 additions & 24 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -691,11 +691,13 @@ static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
#endif
// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323
static uint64_t old_pause_time = 0;
static uint64_t old_mut_time = 0;
// start with values that are in the target ranges to reduce transient hiccups at startup
static uint64_t old_pause_time = 1e7; // 10 ms
static uint64_t old_mut_time = 1e9; // 1 second
static uint64_t old_nongc_time = 1e9; // 1 second
static uint64_t old_heap_size = 0;
static uint64_t old_alloc_diff = 0;
static uint64_t old_freed_diff = 0;
static uint64_t old_alloc_diff = default_collect_interval;
static uint64_t old_freed_diff = default_collect_interval;
static uint64_t gc_end_time = 0;
static int thrash_counter = 0;
static int thrashing = 0;
Expand Down Expand Up @@ -3238,9 +3240,14 @@ JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
return live_bytes;
}

double jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
{
return factor * old_val + (1.0-factor) * new_val;
double est = factor * old_val + (1 - factor) * new_val;
if (est <= 1)
return 1; // avoid issues with <= 0
if (est > (uint64_t)2<<36)
return (uint64_t)2<<36; // avoid overflow
return est;
}

size_t jl_maxrss(void);
Expand All @@ -3257,7 +3264,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
jl_gc_markqueue_t *mq = &ptls->mark_queue;

uint64_t gc_start_time = jl_hrtime();
uint64_t mutator_time = gc_start_time - gc_end_time;
uint64_t mutator_time = gc_end_time == 0 ? old_nongc_time : gc_start_time - gc_end_time;
uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
int64_t last_perm_scanned_bytes = perm_scanned_bytes;
uint64_t start_mark_time = jl_hrtime();
Expand Down Expand Up @@ -3434,31 +3441,34 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)

size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
double target_allocs = 0.0;
double min_interval = default_collect_interval;
uint64_t min_interval = default_collect_interval;
uint64_t alloc_diff = before_free_heap_size - old_heap_size;
uint64_t freed_diff = before_free_heap_size - heap_size;
if (collection == JL_GC_AUTO) {
uint64_t alloc_diff = before_free_heap_size - old_heap_size;
uint64_t freed_diff = before_free_heap_size - heap_size;
// do not update any heuristics when the user forces GC
double alloc_smooth_factor = 0.95;
double collect_smooth_factor = 0.5;
double tuning_factor = 0.03;
double alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
double alloc_time = jl_gc_smooth(old_mut_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator
double gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
double gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
old_alloc_diff = alloc_diff;
old_mut_time = mutator_time;
old_freed_diff = freed_diff;
old_pause_time = pause;
old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
if (gc_time > alloc_time * 95 && !(thrash_counter < 4))
uint64_t alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
uint64_t nongc_time = jl_gc_smooth(old_nongc_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator
uint64_t alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); // TODO: subtract estimated finalizer time?
uint64_t gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
uint64_t gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
old_alloc_diff = alloc_mem;
old_nongc_time = nongc_time;
old_mut_time = alloc_time;
old_freed_diff = gc_mem;
old_pause_time = gc_time;
if (gc_time > alloc_time && !(thrash_counter < 4)) // thrashing if GC marking more than 50% of the runtime
thrash_counter += 1;
else if (thrash_counter > 0)
thrash_counter -= 1;
if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0 ) {
double alloc_rate = alloc_mem/alloc_time;
double gc_rate = gc_mem/gc_time;
double alloc_rate = (double)alloc_mem/alloc_time;
double gc_rate = (double)gc_mem/gc_time;
target_allocs = sqrt(((double)heap_size/min_interval * alloc_rate)/(gc_rate * tuning_factor)); // work on multiples of min interval
}
old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
}
if (thrashing == 0 && thrash_counter >= 3)
thrashing = 1;
Expand Down Expand Up @@ -3519,8 +3529,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_num.max_memory = max_memory;
}
gc_final_pause_end(gc_start_time, gc_end_time);
gc_time_sweep_pause(gc_end_time, allocd, live_bytes,
estimate_freed, sweep_full);
gc_time_sweep_pause(gc_end_time, gc_num.allocd, live_bytes,
gc_num.freed, sweep_full);
gc_num.full_sweep += sweep_full;
last_live_bytes = live_bytes;
live_bytes += -gc_num.freed + gc_num.allocd;
Expand All @@ -3530,6 +3540,16 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
live_bytes, gc_num.interval, pause,
gc_num.time_to_safepoint,
gc_num.mark_time, gc_num.sweep_time);
if (collection == JL_GC_AUTO) {
gc_heuristics_summary(
old_alloc_diff, alloc_diff,
old_nongc_time, mutator_time + sweep_time,
old_mut_time, mutator_time,
old_freed_diff, freed_diff,
old_pause_time, pause - sweep_time,
thrash_counter,
heap_size, target_heap);
}

prev_sweep_full = sweep_full;
gc_num.pause += !recollect;
Expand Down
16 changes: 16 additions & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,14 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
uint64_t freed, uint64_t live, uint64_t interval,
uint64_t pause, uint64_t ttsp, uint64_t mark,
uint64_t sweep);
void gc_heuristics_summary(
uint64_t old_alloc_diff, uint64_t alloc_mem,
uint64_t old_nongc_time, uint64_t nongc_time,
uint64_t old_mut_time, uint64_t alloc_time,
uint64_t old_freed_diff, uint64_t gc_mem,
uint64_t old_pause_time, uint64_t gc_time,
int thrash_counter,
uint64_t current_heap, uint64_t target_heap);
#else
#define gc_time_pool_start()
STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -546,6 +554,14 @@ STATIC_INLINE void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT
estimate_freed, sweep_full)
#define gc_time_summary(sweep_full, start, end, freed, live, \
interval, pause, ttsp, mark, sweep)
#define gc_heuristics_summary( \
old_alloc_diff, alloc_mem, \
old_nongc_time, nongc_time, \
old_mut_time, alloc_time, \
old_freed_diff, gc_mem, \
old_pause_time, gc_time, \
thrash_counter, \
current_heap, target_heap)
#endif

#ifdef MEMFENCE
Expand Down

0 comments on commit 96e70e6

Please sign in to comment.