From 38eb51675bc62961abdb191f17ae76e14f81748a Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Tue, 4 Jun 2019 18:46:19 -0400 Subject: [PATCH] make GC counters thread-local fixes #27173 --- src/gc.c | 120 ++++++++++++++++++++++++++++++-------------- src/julia_threads.h | 11 ++++ 2 files changed, 94 insertions(+), 37 deletions(-) diff --git a/src/gc.c b/src/gc.c index 0fe4b1fdc460a..6f2cfbdef9f4f 100644 --- a/src/gc.c +++ b/src/gc.c @@ -491,6 +491,16 @@ static size_t max_collect_interval = 1250000000UL; static size_t max_collect_interval = 500000000UL; #endif +static int64_t per_thread_counter_interval(int tid, size_t interval) +{ + if (jl_n_threads == 1) + return interval; + // stagger global counter updates over threads, starting at some minimum (1MB), + // in varying amounts that add to `interval`. + size_t min = 1048576; + return tid * 2 * (interval/jl_n_threads - min) / (jl_n_threads - 1) + min; +} + // global variables for GC stats // Resetting the object to a young object, this is used when marking the @@ -802,11 +812,17 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT jl_gc_queue_root(v); } -#define should_collect() (__unlikely(gc_num.allocd>0)) - static inline int maybe_collect(jl_ptls_t ptls) { - if (should_collect() || gc_debug_check_other()) { + int should_collect = 0; + if (ptls->gc_num.allocd >= 0) { + int64_t intvl = per_thread_counter_interval(ptls->tid, gc_num.interval); + size_t localbytes = ptls->gc_num.allocd + intvl; + ptls->gc_num.allocd = -intvl; + jl_atomic_fetch_add(&gc_num.allocd, localbytes); + should_collect = (gc_num.allocd > 0); + } + if (should_collect || gc_debug_check_other()) { jl_gc_collect(0); return 1; } @@ -876,12 +892,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); -#ifdef JULIA_ENABLE_THREADING - jl_atomic_fetch_add(&gc_num.allocd, allocsz); -#else - gc_num.allocd += allocsz; -#endif - gc_num.bigalloc++; + ptls->gc_num.allocd += allocsz; + ptls->gc_num.bigalloc++; #ifdef MEMDEBUG memset(v, 0xee, allocsz); #endif @@ -973,14 +985,44 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { - gc_num.allocd += sz; + jl_ptls_t ptls = jl_get_ptls_states(); + ptls->gc_num.allocd += sz; +} + +static void combine_thread_gc_counts(jl_gc_num_t *dest) +{ + for (int i = 0; i < jl_n_threads; i++) { + jl_ptls_t ptls = jl_all_tls_states[i]; + if (ptls) { + dest->allocd += (ptls->gc_num.allocd + per_thread_counter_interval(ptls->tid, gc_num.interval)); + dest->freed += ptls->gc_num.freed; + dest->malloc += ptls->gc_num.malloc; + dest->realloc += ptls->gc_num.realloc; + dest->poolalloc += ptls->gc_num.poolalloc; + dest->bigalloc += ptls->gc_num.bigalloc; + dest->freecall += ptls->gc_num.freecall; + } + } +} + +static void reset_thread_gc_counts(void) +{ + for (int i = 0; i < jl_n_threads; i++) { + jl_ptls_t ptls = jl_all_tls_states[i]; + if (ptls) { + memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t)); + ptls->gc_num.allocd = -per_thread_counter_interval(ptls->tid, gc_num.interval); + } + } } void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT { + combine_thread_gc_counts(&gc_num); live_bytes += (gc_num.deferred_alloc + (gc_num.allocd + gc_num.interval)); gc_num.allocd = -(int64_t)gc_num.interval; gc_num.deferred_alloc = 0; + reset_thread_gc_counts(); } static size_t array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT @@ -1098,16 +1140,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, #ifdef MEMDEBUG return jl_gc_big_alloc(ptls, osize); #endif - // FIXME - need JL_ATOMIC_FETCH_AND_ADD here - if (__unlikely((gc_num.allocd += osize) >= 0) || gc_debug_check_pool()) { - //gc_num.allocd -= osize; - jl_gc_collect(0); - //gc_num.allocd += osize; - } - else { - jl_gc_safepoint_(ptls); - } - gc_num.poolalloc++; + maybe_collect(ptls); + ptls->gc_num.allocd += osize; + ptls->gc_num.poolalloc++; // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v) { @@ -2603,9 +2638,11 @@ JL_DLLEXPORT int jl_gc_is_enabled(void) JL_DLLEXPORT int64_t jl_gc_total_bytes(void) { + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num); // Sync this logic with `base/util.jl:GC_Diff` - return (gc_num.total_allocd + gc_num.deferred_alloc + - gc_num.allocd + gc_num.interval); + return (num.total_allocd + num.deferred_alloc + + num.allocd + num.interval); } JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) { @@ -2613,7 +2650,9 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) } JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { - return gc_num; + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num); + return num; } JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) @@ -2687,6 +2726,8 @@ static void jl_gc_queue_bt_buf(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp // Only one thread should be running in this function static int _jl_gc_collect(jl_ptls_t ptls, int full) { + combine_thread_gc_counts(&gc_num); + jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache; jl_gc_mark_sp_t sp; gc_mark_sp_init(gc_cache, &sp); @@ -2853,6 +2894,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full) gc_num.total_time += pause; gc_num.since_sweep = 0; gc_num.freed = 0; + reset_thread_gc_counts(); return recollect; } @@ -2962,6 +3004,9 @@ void jl_init_thread_heap(jl_ptls_t ptls) gc_cache->pc_stack = (void**)malloc(init_size * sizeof(void*)); gc_cache->pc_stack_end = gc_cache->pc_stack + init_size; gc_cache->data_stack = (jl_gc_mark_data_t *)malloc(init_size * sizeof(jl_gc_mark_data_t)); + + memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t)); + ptls->gc_num.allocd = -per_thread_counter_interval(ptls->tid, default_collect_interval); } // System-wide initializations @@ -2999,8 +3044,8 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { jl_ptls_t ptls = jl_get_ptls_states(); maybe_collect(ptls); - gc_num.allocd += sz; - gc_num.malloc++; + ptls->gc_num.allocd += sz; + ptls->gc_num.malloc++; void *b = malloc(sz); if (b == NULL) jl_throw(jl_memory_exception); @@ -3011,8 +3056,8 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) { jl_ptls_t ptls = jl_get_ptls_states(); maybe_collect(ptls); - gc_num.allocd += nm*sz; - gc_num.malloc++; + ptls->gc_num.allocd += nm*sz; + ptls->gc_num.malloc++; void *b = calloc(nm, sz); if (b == NULL) jl_throw(jl_memory_exception); @@ -3021,9 +3066,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { + jl_ptls_t ptls = jl_get_ptls_states(); free(p); - gc_num.freed += sz; - gc_num.freecall++; + ptls->gc_num.freed += sz; + ptls->gc_num.freecall++; } // older name for jl_gc_counted_free_with_size @@ -3037,10 +3083,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = jl_get_ptls_states(); maybe_collect(ptls); if (sz < old) - gc_num.freed += (old - sz); + ptls->gc_num.freed += (old - sz); else - gc_num.allocd += (sz - old); - gc_num.realloc++; + ptls->gc_num.allocd += (sz - old); + ptls->gc_num.realloc++; void *b = realloc(p, sz); if (b == NULL) jl_throw(jl_memory_exception); @@ -3100,8 +3146,8 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); - gc_num.allocd += allocsz; - gc_num.malloc++; + ptls->gc_num.allocd += allocsz; + ptls->gc_num.malloc++; void *b = malloc_cache_align(allocsz); if (b == NULL) jl_throw(jl_memory_exception); @@ -3123,10 +3169,10 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds live_bytes += allocsz - oldsz; } else if (allocsz < oldsz) - gc_num.freed += (oldsz - allocsz); + ptls->gc_num.freed += (oldsz - allocsz); else - gc_num.allocd += (allocsz - oldsz); - gc_num.realloc++; + ptls->gc_num.allocd += (allocsz - oldsz); + ptls->gc_num.realloc++; void *b; if (isaligned) diff --git a/src/julia_threads.h b/src/julia_threads.h index a626da27c7d65..80ab79b153bd8 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -65,6 +65,16 @@ typedef struct { uint16_t osize; // size of objects in this pool } jl_gc_pool_t; +typedef struct { + int64_t allocd; + int64_t freed; + uint64_t malloc; + uint64_t realloc; + uint64_t poolalloc; + uint64_t bigalloc; + uint64_t freecall; +} jl_thread_gc_num_t; + typedef struct { // variable for tracking weak references arraylist_t weak_refs; @@ -155,6 +165,7 @@ struct _jl_tls_states_t { volatile int8_t in_finalizer; int8_t disable_gc; jl_thread_heap_t heap; + jl_thread_gc_num_t gc_num; uv_mutex_t sleep_lock; uv_cond_t wake_signal; volatile sig_atomic_t defer_signal;