diff --git a/src/gc-debug.c b/src/gc-debug.c index 3af15c4326ab8..56afd20fe2751 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -348,8 +348,8 @@ static void gc_scrub_range(char *stack_lo, char *stack_hi) continue; jl_gc_pagemeta_t *pg = page_metadata(tag); // Make sure the sweep rebuild the freelist - pg->allocd = 1; - pg->gc_bits = 0x3; + pg->has_marked = 1; + pg->has_young = 1; // Find the age bit char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET; int obj_id = (((char*)tag) - page_begin) / osize; @@ -358,6 +358,7 @@ static void gc_scrub_range(char *stack_lo, char *stack_hi) // (especially on 32bit where it's more likely to have pointer-like // bit patterns) *ages &= ~(1 << (obj_id % 8)); + // set mark to GC_MARKED_NOESC (young and marked) memset(tag, 0xff, osize); } } @@ -496,6 +497,62 @@ void gc_debug_init(void) #endif } +// Simple and dumb way to count cells with different gc bits in allocated pages +// Use as ground truth for debugging memory-leak-like issues. +static int64_t poolobj_sizes[4]; +static int64_t empty_pages; + +static void gc_count_pool_page(jl_gc_pagemeta_t *pg) +{ + int osize = pg->osize; + char *data = pg->data; + gcval_t *v = (gcval_t*)(data + GC_PAGE_OFFSET); + char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize; + int has_live = 0; + while ((char*)v <= lim) { + int bits = gc_bits(v); + if (bits & GC_MARKED) + has_live = 1; + poolobj_sizes[bits] += osize; + v = (gcval_t*)((char*)v + osize); + } + if (!has_live) { + empty_pages++; + } +} + +static void gc_count_pool_region(region_t *region) +{ + for (int pg_i = 0; pg_i < region->pg_cnt / 32; pg_i++) { + uint32_t line = region->allocmap[pg_i]; + if (line) { + for (int j = 0; j < 32; j++) { + if ((line >> j) & 1) { + gc_count_pool_page(®ion->meta[pg_i*32 + j]); + } + } + } + } +} + +void gc_count_pool(void) +{ + memset(&poolobj_sizes, 0, sizeof(poolobj_sizes)); + empty_pages = 0; + for (int i = 0; i < REGION_COUNT; i++) { + if (regions[i].pages) { + gc_count_pool_region(®ions[i]); + } + } + jl_safe_printf("****** Pool stat: ******\n"); + for (int i = 0;i < 4;i++) + jl_safe_printf("bits(%d): %" PRId64 "\n", i, poolobj_sizes[i]); + // empty_pages is inaccurate after the sweep since young objects are + // also GC_CLEAN + jl_safe_printf("free pages: % " PRId64 "\n", empty_pages); + jl_safe_printf("************************\n"); +} + #ifdef __cplusplus } #endif diff --git a/src/gc.c b/src/gc.c index 0fcfe0a4ed301..b790e4e1eb3a6 100644 --- a/src/gc.c +++ b/src/gc.c @@ -264,8 +264,6 @@ static size_t max_collect_interval = 500000000UL; #define NS_TO_S(t) ((double)(t/1000)/(1000*1000)) #define NS2MS(t) ((double)(t/1000)/1000) -static int64_t live_bytes = 0; -static int64_t promoted_bytes = 0; JL_DLLEXPORT size_t jl_gc_total_freed_bytes=0; #ifdef GC_FINAL_STATS @@ -322,9 +320,43 @@ static int64_t scanned_bytes; // young bytes scanned while marking static int64_t perm_scanned_bytes; // old bytes scanned while marking static int prev_sweep_mask = GC_MARKED; -static size_t array_nbytes(jl_array_t*); #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 +// Full collection heuristics +static int64_t live_bytes = 0; +static int64_t promoted_bytes = 0; + +static int64_t last_full_live_ub = 0; +static int64_t last_full_live_est = 0; +// upper bound and estimated live object sizes +// This heuristic should be really unlikely to trigger. +// However, this should be simple enough to trigger a full collection +// when it's necessary if other heuristics are messed up. +// It is also possible to take the total memory available into account +// if necessary. +STATIC_INLINE int gc_check_heap_size(int64_t sz_ub, int64_t sz_est) +{ + if (__unlikely(!last_full_live_ub || last_full_live_ub > sz_ub)) { + last_full_live_ub = sz_ub; + } + else if (__unlikely(last_full_live_ub * 3 / 2 < sz_ub)) { + return 1; + } + if (__unlikely(!last_full_live_est || last_full_live_est > sz_est)) { + last_full_live_est = sz_est; + } + else if (__unlikely(last_full_live_est * 2 < sz_est)) { + return 1; + } + return 0; +} + +STATIC_INLINE void gc_update_heap_size(int64_t sz_ub, int64_t sz_est) +{ + last_full_live_ub = sz_ub; + last_full_live_est = sz_est; +} + static inline int gc_setmark_big(void *o, int mark_mode) { if (gc_verifying) { @@ -375,15 +407,19 @@ static inline int gc_setmark_pool(void *o, int mark_mode) mark_mode = GC_MARKED; } if (!(bits & GC_MARKED)) { - if (mark_mode == GC_MARKED) + if (mark_mode == GC_MARKED) { perm_scanned_bytes += page->osize; - else + page->nold++; + } + else { scanned_bytes += page->osize; + } objprofile_count(jl_typeof(jl_valueof(o)), mark_mode == GC_MARKED, page->osize); } + assert(mark_mode & GC_MARKED); + page->has_marked = 1; _gc_setmark(o, mark_mode); - page->gc_bits |= mark_mode; verify_val(jl_valueof(o)); return mark_mode; } @@ -634,7 +670,6 @@ static void sweep_malloced_arrays(void) // pool allocation static inline gcval_t *reset_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t *fl) { - pg->gc_bits = 0; pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize; jl_tls_states_t *ptls = jl_all_tls_states[pg->thread_n]; pg->pool_n = p - ptls->heap.norm_pools; @@ -642,7 +677,8 @@ static inline gcval_t *reset_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t gcval_t *beg = (gcval_t*)(pg->data + GC_PAGE_OFFSET); gcval_t *end = (gcval_t*)((char*)beg + (pg->nfree - 1)*p->osize); end->next = fl; - pg->allocd = 0; + pg->has_young = 0; + pg->has_marked = 0; pg->fl_begin_offset = GC_PAGE_OFFSET; pg->fl_end_offset = (char*)end - (char*)beg + GC_PAGE_OFFSET; return beg; @@ -691,7 +727,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset) jl_gc_pagemeta_t *pg = page_metadata(v); assert(pg->osize == p->osize); pg->nfree = 0; - pg->allocd = 1; + pg->has_young = 1; if (next) p->nfree = page_metadata(next)->nfree; } @@ -712,7 +748,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset) jl_gc_pagemeta_t *pg = page_metadata(v); assert(pg->osize == p->osize); pg->nfree = 0; - pg->allocd = 1; + pg->has_young = 1; p->newpages = v->next; } v->flags = 0; @@ -840,12 +876,15 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl freedall = 1; old_nfree += pg->nfree; - if (pg->gc_bits == GC_MARKED) { - // this page only contains GC_MARKED and free cells - // if we are doing a quick sweep and nothing has been allocated inside since last sweep - // we can skip it - if (sweep_mask == GC_MARKED_NOESC && !pg->allocd) { - // the position of the freelist begin/end in this page is stored in its metadata + if (!pg->has_marked) + goto free_page; + // For quick sweep, we might be able to skip the page if the page doesn't + // have any young live cell before marking. + if (sweep_mask == GC_MARKED_NOESC && !pg->has_young) { + assert(prev_sweep_mask == GC_MARKED_NOESC || pg->prev_nold >= pg->nold); + if (prev_sweep_mask == GC_MARKED_NOESC || pg->prev_nold == pg->nold) { + // the position of the freelist begin/end in this page + // is stored in its metadata if (pg->fl_begin_offset != (uint16_t)-1) { *pfl = page_pfl_beg(pg); pfl = prev_pfl = (gcval_t**)page_pfl_end(pg); @@ -855,11 +894,11 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl goto free_page; } } - else if (pg->gc_bits == GC_CLEAN) { - goto free_page; - } { // scope to avoid clang goto errors + int has_marked = 0; + int has_young = 0; + int16_t prev_nold = 0; int pg_nfree = 0; gcval_t **pfl_begin = NULL; uint8_t msk = 1; // mask for the age bit in the current age byte @@ -873,14 +912,20 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl *ages &= ~msk; } else { // marked young or old - if (*ages & msk) { // old enough + if (*ages & msk || bits == GC_MARKED) { // old enough + // `!age && bits == GC_MARKED` is possible for + // non-first-class objects like `jl_binding_t` if (sweep_mask == GC_MARKED || bits == GC_MARKED_NOESC) { - gc_bits(v) = GC_QUEUED; // promote + bits = gc_bits(v) = GC_QUEUED; // promote } + prev_nold++; } - else if ((sweep_mask & bits) == sweep_mask) { - gc_bits(v) = GC_CLEAN; // unmark + else { + assert(bits == GC_MARKED_NOESC); + bits = gc_bits(v) = GC_CLEAN; // unmark + has_young = 1; } + has_marked |= (bits & GC_MARKED) != 0; *ages |= msk; freedall = 0; } @@ -892,12 +937,18 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl } } + assert(!freedall); + pg->has_marked = has_marked; + pg->has_young = has_young; pg->fl_begin_offset = pfl_begin ? (char*)pfl_begin - data : (uint16_t)-1; pg->fl_end_offset = pfl_begin ? (char*)pfl - data : (uint16_t)-1; pg->nfree = pg_nfree; + if (sweep_mask == GC_MARKED) { + pg->nold = 0; + pg->prev_nold = prev_nold; + } page_done++; - pg->allocd = 0; } free_page: pg_freedall += freedall; @@ -933,10 +984,6 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl nfree += obj_per_page; } else { - if (sweep_mask == GC_MARKED) - pg->gc_bits = GC_CLEAN; - if (sweep_mask == GC_MARKED_NOESC) - pg->gc_bits = GC_MARKED; nfree += pg->nfree; } @@ -991,7 +1038,7 @@ static int gc_sweep_inc(int sweep_mask) gcval_t *last = p->freelist; if (last) { jl_gc_pagemeta_t *pg = page_metadata(last); - pg->allocd = 1; + pg->has_young = 1; pg->nfree = p->nfree; } p->freelist = NULL; @@ -1001,7 +1048,7 @@ static int gc_sweep_inc(int sweep_mask) if (last) { jl_gc_pagemeta_t *pg = page_metadata(last); pg->nfree = (GC_PAGE_SZ - ((char*)last - gc_page_data(last))) / p->osize; - pg->allocd = 1; + pg->has_young = 1; } p->newpages = NULL; } @@ -1681,7 +1728,9 @@ static void _jl_gc_collect(int full, char *stack_hi) #if defined(GC_TIME) || defined(GC_FINAL_STATS) post_time = jl_hrtime() - post_time; #endif - estimate_freed = live_bytes - scanned_bytes - perm_scanned_bytes + actual_allocd; + int64_t live_sz_ub = live_bytes + actual_allocd; + int64_t live_sz_est = scanned_bytes + perm_scanned_bytes; + estimate_freed = live_sz_ub - live_sz_est; gc_verify(); @@ -1700,7 +1749,13 @@ static void _jl_gc_collect(int full, char *stack_hi) for (int i = 0;i < jl_n_threads;i++) nptr += jl_all_tls_states[i]->heap.remset_nptr; int large_frontier = nptr*sizeof(void*) >= default_collect_interval; // many pointers in the intergen frontier => "quick" mark is not quick - if ((full || large_frontier || ((not_freed_enough || promoted_bytes >= gc_num.interval) && (promoted_bytes >= default_collect_interval || prev_sweep_mask == GC_MARKED))) && gc_num.pause > 1) { + if ((full || large_frontier || + ((not_freed_enough || promoted_bytes >= gc_num.interval) && + (promoted_bytes >= default_collect_interval || + prev_sweep_mask == GC_MARKED)) || + gc_check_heap_size(live_sz_ub, live_sz_est)) && + gc_num.pause > 1) { + gc_update_heap_size(live_sz_ub, live_sz_est); if (prev_sweep_mask != GC_MARKED || full) { if (full) recollect = 1; // TODO enable this? } diff --git a/src/gc.h b/src/gc.h index 37c91cc7c9d51..993b2f5063f3a 100644 --- a/src/gc.h +++ b/src/gc.h @@ -138,12 +138,33 @@ typedef struct _mallocarray_t { // pool page metadata typedef struct { struct { - uint16_t pool_n : 8; // index (into norm_pool) of pool that owns this page - uint16_t allocd : 1; // true if an allocation happened in this page since last sweep - uint16_t gc_bits : 2; // this is a bitwise | of all gc_bits in this page + // index of pool that owns this page + uint16_t pool_n : 8; + // Whether any cell in the page is marked + // This bit is set before sweeping iff there's live cells in the page. + // Note that before marking or after sweeping there can be live + // (and young) cells in the page for `!has_marked`. + uint16_t has_marked: 1; + // Whether any cell was live and young **before sweeping**. + // For a normal sweep (quick sweep that is NOT preceded by a + // full sweep) this bit is set iff there are young or newly dead + // objects in the page and the page needs to be swept. + // + // For a full sweep, this bit should be ignored. + // + // For a quick sweep preceded by a full sweep. If this bit is set, + // the page needs to be swept. If this bit is not set, there could + // still be old dead objects in the page and `nold` and `prev_nold` + // should be used to determine if the page needs to be swept. + uint16_t has_young: 1; }; - uint16_t nfree; // number of free objects in this page. - // invalid if pool that owns this page is allocating objects from this page. + // number of old objects in this page + uint16_t nold; + // number of old objects in this page during the previous full sweep + uint16_t prev_nold; + // number of free objects in this page. + // invalid if pool that owns this page is allocating objects from this page. + uint16_t nfree; uint16_t osize; // size of each object in this page uint16_t fl_begin_offset; // offset of first free object in this page uint16_t fl_end_offset; // offset of last free object in this page @@ -338,6 +359,9 @@ static inline void objprofile_reset(void) } #endif +// For debugging +void gc_count_pool(void); + #ifdef __cplusplus } #endif