Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite GC page metadata for sweeping fast path #16632

Merged
merged 4 commits into from
Jun 3, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ static void gc_scrub_range(char *stack_lo, char *stack_hi)
continue;
jl_gc_pagemeta_t *pg = page_metadata(tag);
// Make sure the sweep rebuild the freelist
pg->allocd = 1;
pg->gc_bits = 0x3;
pg->has_marked = 1;
pg->has_young = 1;
// Find the age bit
char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
int obj_id = (((char*)tag) - page_begin) / osize;
Expand All @@ -358,6 +358,7 @@ static void gc_scrub_range(char *stack_lo, char *stack_hi)
// (especially on 32bit where it's more likely to have pointer-like
// bit patterns)
*ages &= ~(1 << (obj_id % 8));
// set mark to GC_MARKED_NOESC (young and marked)
memset(tag, 0xff, osize);
}
}
Expand Down Expand Up @@ -496,6 +497,62 @@ void gc_debug_init(void)
#endif
}

// Simple and dumb way to count cells with different gc bits in allocated pages
// Use as ground truth for debugging memory-leak-like issues.
static int64_t poolobj_sizes[4];
static int64_t empty_pages;

static void gc_count_pool_page(jl_gc_pagemeta_t *pg)
{
int osize = pg->osize;
char *data = pg->data;
gcval_t *v = (gcval_t*)(data + GC_PAGE_OFFSET);
char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize;
int has_live = 0;
while ((char*)v <= lim) {
int bits = gc_bits(v);
if (bits & GC_MARKED)
has_live = 1;
poolobj_sizes[bits] += osize;
v = (gcval_t*)((char*)v + osize);
}
if (!has_live) {
empty_pages++;
}
}

static void gc_count_pool_region(region_t *region)
{
for (int pg_i = 0; pg_i < region->pg_cnt / 32; pg_i++) {
uint32_t line = region->allocmap[pg_i];
if (line) {
for (int j = 0; j < 32; j++) {
if ((line >> j) & 1) {
gc_count_pool_page(&region->meta[pg_i*32 + j]);
}
}
}
}
}

void gc_count_pool(void)
{
memset(&poolobj_sizes, 0, sizeof(poolobj_sizes));
empty_pages = 0;
for (int i = 0; i < REGION_COUNT; i++) {
if (regions[i].pages) {
gc_count_pool_region(&regions[i]);
}
}
jl_safe_printf("****** Pool stat: ******\n");
for (int i = 0;i < 4;i++)
jl_safe_printf("bits(%d): %" PRId64 "\n", i, poolobj_sizes[i]);
// empty_pages is inaccurate after the sweep since young objects are
// also GC_CLEAN
jl_safe_printf("free pages: % " PRId64 "\n", empty_pages);
jl_safe_printf("************************\n");
}

#ifdef __cplusplus
}
#endif
119 changes: 87 additions & 32 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,6 @@ static size_t max_collect_interval = 500000000UL;

#define NS_TO_S(t) ((double)(t/1000)/(1000*1000))
#define NS2MS(t) ((double)(t/1000)/1000)
static int64_t live_bytes = 0;
static int64_t promoted_bytes = 0;

JL_DLLEXPORT size_t jl_gc_total_freed_bytes=0;
#ifdef GC_FINAL_STATS
Expand Down Expand Up @@ -322,9 +320,43 @@ static int64_t scanned_bytes; // young bytes scanned while marking
static int64_t perm_scanned_bytes; // old bytes scanned while marking
static int prev_sweep_mask = GC_MARKED;

static size_t array_nbytes(jl_array_t*);
#define inc_sat(v,s) v = (v) >= s ? s : (v)+1

// Full collection heuristics
static int64_t live_bytes = 0;
static int64_t promoted_bytes = 0;

static int64_t last_full_live_ub = 0;
static int64_t last_full_live_est = 0;
// upper bound and estimated live object sizes
// This heuristic should be really unlikely to trigger.
// However, this should be simple enough to trigger a full collection
// when it's necessary if other heuristics are messed up.
// It is also possible to take the total memory available into account
// if necessary.
STATIC_INLINE int gc_check_heap_size(int64_t sz_ub, int64_t sz_est)
{
if (__unlikely(!last_full_live_ub || last_full_live_ub > sz_ub)) {
last_full_live_ub = sz_ub;
}
else if (__unlikely(last_full_live_ub * 3 / 2 < sz_ub)) {
return 1;
}
if (__unlikely(!last_full_live_est || last_full_live_est > sz_est)) {
last_full_live_est = sz_est;
}
else if (__unlikely(last_full_live_est * 2 < sz_est)) {
return 1;
}
return 0;
}

STATIC_INLINE void gc_update_heap_size(int64_t sz_ub, int64_t sz_est)
{
last_full_live_ub = sz_ub;
last_full_live_est = sz_est;
}

static inline int gc_setmark_big(void *o, int mark_mode)
{
if (gc_verifying) {
Expand Down Expand Up @@ -375,15 +407,19 @@ static inline int gc_setmark_pool(void *o, int mark_mode)
mark_mode = GC_MARKED;
}
if (!(bits & GC_MARKED)) {
if (mark_mode == GC_MARKED)
if (mark_mode == GC_MARKED) {
perm_scanned_bytes += page->osize;
else
page->nold++;
}
else {
scanned_bytes += page->osize;
}
objprofile_count(jl_typeof(jl_valueof(o)),
mark_mode == GC_MARKED, page->osize);
}
assert(mark_mode & GC_MARKED);
page->has_marked = 1;
_gc_setmark(o, mark_mode);
page->gc_bits |= mark_mode;
verify_val(jl_valueof(o));
return mark_mode;
}
Expand Down Expand Up @@ -634,15 +670,15 @@ static void sweep_malloced_arrays(void)
// pool allocation
static inline gcval_t *reset_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t *fl)
{
pg->gc_bits = 0;
pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
jl_tls_states_t *ptls = jl_all_tls_states[pg->thread_n];
pg->pool_n = p - ptls->heap.norm_pools;
memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1);
gcval_t *beg = (gcval_t*)(pg->data + GC_PAGE_OFFSET);
gcval_t *end = (gcval_t*)((char*)beg + (pg->nfree - 1)*p->osize);
end->next = fl;
pg->allocd = 0;
pg->has_young = 0;
pg->has_marked = 0;
pg->fl_begin_offset = GC_PAGE_OFFSET;
pg->fl_end_offset = (char*)end - (char*)beg + GC_PAGE_OFFSET;
return beg;
Expand Down Expand Up @@ -691,7 +727,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset)
jl_gc_pagemeta_t *pg = page_metadata(v);
assert(pg->osize == p->osize);
pg->nfree = 0;
pg->allocd = 1;
pg->has_young = 1;
if (next)
p->nfree = page_metadata(next)->nfree;
}
Expand All @@ -712,7 +748,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset)
jl_gc_pagemeta_t *pg = page_metadata(v);
assert(pg->osize == p->osize);
pg->nfree = 0;
pg->allocd = 1;
pg->has_young = 1;
p->newpages = v->next;
}
v->flags = 0;
Expand Down Expand Up @@ -840,12 +876,15 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
freedall = 1;
old_nfree += pg->nfree;

if (pg->gc_bits == GC_MARKED) {
// this page only contains GC_MARKED and free cells
// if we are doing a quick sweep and nothing has been allocated inside since last sweep
// we can skip it
if (sweep_mask == GC_MARKED_NOESC && !pg->allocd) {
// the position of the freelist begin/end in this page is stored in its metadata
if (!pg->has_marked)
goto free_page;
// For quick sweep, we might be able to skip the page if the page doesn't
// have any young live cell before marking.
if (sweep_mask == GC_MARKED_NOESC && !pg->has_young) {
assert(prev_sweep_mask == GC_MARKED_NOESC || pg->prev_nold >= pg->nold);
if (prev_sweep_mask == GC_MARKED_NOESC || pg->prev_nold == pg->nold) {
// the position of the freelist begin/end in this page
// is stored in its metadata
if (pg->fl_begin_offset != (uint16_t)-1) {
*pfl = page_pfl_beg(pg);
pfl = prev_pfl = (gcval_t**)page_pfl_end(pg);
Expand All @@ -855,11 +894,11 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
goto free_page;
}
}
else if (pg->gc_bits == GC_CLEAN) {
goto free_page;
}

{ // scope to avoid clang goto errors
int has_marked = 0;
int has_young = 0;
int16_t prev_nold = 0;
int pg_nfree = 0;
gcval_t **pfl_begin = NULL;
uint8_t msk = 1; // mask for the age bit in the current age byte
Expand All @@ -873,14 +912,20 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
*ages &= ~msk;
}
else { // marked young or old
if (*ages & msk) { // old enough
if (*ages & msk || bits == GC_MARKED) { // old enough
// `!age && bits == GC_MARKED` is possible for
// non-first-class objects like `jl_binding_t`
if (sweep_mask == GC_MARKED || bits == GC_MARKED_NOESC) {
gc_bits(v) = GC_QUEUED; // promote
bits = gc_bits(v) = GC_QUEUED; // promote
}
prev_nold++;
}
else if ((sweep_mask & bits) == sweep_mask) {
gc_bits(v) = GC_CLEAN; // unmark
else {
assert(bits == GC_MARKED_NOESC);
bits = gc_bits(v) = GC_CLEAN; // unmark
has_young = 1;
}
has_marked |= (bits & GC_MARKED) != 0;
*ages |= msk;
freedall = 0;
}
Expand All @@ -892,12 +937,18 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
}
}

assert(!freedall);
pg->has_marked = has_marked;
pg->has_young = has_young;
pg->fl_begin_offset = pfl_begin ? (char*)pfl_begin - data : (uint16_t)-1;
pg->fl_end_offset = pfl_begin ? (char*)pfl - data : (uint16_t)-1;

pg->nfree = pg_nfree;
if (sweep_mask == GC_MARKED) {
pg->nold = 0;
pg->prev_nold = prev_nold;
}
page_done++;
pg->allocd = 0;
}
free_page:
pg_freedall += freedall;
Expand Down Expand Up @@ -933,10 +984,6 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
nfree += obj_per_page;
}
else {
if (sweep_mask == GC_MARKED)
pg->gc_bits = GC_CLEAN;
if (sweep_mask == GC_MARKED_NOESC)
pg->gc_bits = GC_MARKED;
nfree += pg->nfree;
}

Expand Down Expand Up @@ -991,7 +1038,7 @@ static int gc_sweep_inc(int sweep_mask)
gcval_t *last = p->freelist;
if (last) {
jl_gc_pagemeta_t *pg = page_metadata(last);
pg->allocd = 1;
pg->has_young = 1;
pg->nfree = p->nfree;
}
p->freelist = NULL;
Expand All @@ -1001,7 +1048,7 @@ static int gc_sweep_inc(int sweep_mask)
if (last) {
jl_gc_pagemeta_t *pg = page_metadata(last);
pg->nfree = (GC_PAGE_SZ - ((char*)last - gc_page_data(last))) / p->osize;
pg->allocd = 1;
pg->has_young = 1;
}
p->newpages = NULL;
}
Expand Down Expand Up @@ -1681,7 +1728,9 @@ static void _jl_gc_collect(int full, char *stack_hi)
#if defined(GC_TIME) || defined(GC_FINAL_STATS)
post_time = jl_hrtime() - post_time;
#endif
estimate_freed = live_bytes - scanned_bytes - perm_scanned_bytes + actual_allocd;
int64_t live_sz_ub = live_bytes + actual_allocd;
int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
estimate_freed = live_sz_ub - live_sz_est;

gc_verify();

Expand All @@ -1700,7 +1749,13 @@ static void _jl_gc_collect(int full, char *stack_hi)
for (int i = 0;i < jl_n_threads;i++)
nptr += jl_all_tls_states[i]->heap.remset_nptr;
int large_frontier = nptr*sizeof(void*) >= default_collect_interval; // many pointers in the intergen frontier => "quick" mark is not quick
if ((full || large_frontier || ((not_freed_enough || promoted_bytes >= gc_num.interval) && (promoted_bytes >= default_collect_interval || prev_sweep_mask == GC_MARKED))) && gc_num.pause > 1) {
if ((full || large_frontier ||
((not_freed_enough || promoted_bytes >= gc_num.interval) &&
(promoted_bytes >= default_collect_interval ||
prev_sweep_mask == GC_MARKED)) ||
gc_check_heap_size(live_sz_ub, live_sz_est)) &&
gc_num.pause > 1) {
gc_update_heap_size(live_sz_ub, live_sz_est);
if (prev_sweep_mask != GC_MARKED || full) {
if (full) recollect = 1; // TODO enable this?
}
Expand Down
34 changes: 29 additions & 5 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,33 @@ typedef struct _mallocarray_t {
// pool page metadata
typedef struct {
struct {
uint16_t pool_n : 8; // index (into norm_pool) of pool that owns this page
uint16_t allocd : 1; // true if an allocation happened in this page since last sweep
uint16_t gc_bits : 2; // this is a bitwise | of all gc_bits in this page
// index of pool that owns this page
uint16_t pool_n : 8;
// Whether any cell in the page is marked
// This bit is set before sweeping iff there's live cells in the page.
// Note that before marking or after sweeping there can be live
// (and young) cells in the page for `!has_marked`.
uint16_t has_marked: 1;
// Whether any cell was live and young **before sweeping**.
// For a normal sweep (quick sweep that is NOT preceded by a
// full sweep) this bit is set iff there are young or newly dead
// objects in the page and the page needs to be swept.
//
// For a full sweep, this bit should be ignored.
//
// For a quick sweep preceded by a full sweep. If this bit is set,
// the page needs to be swept. If this bit is not set, there could
// still be old dead objects in the page and `nold` and `prev_nold`
// should be used to determine if the page needs to be swept.
uint16_t has_young: 1;
};
uint16_t nfree; // number of free objects in this page.
// invalid if pool that owns this page is allocating objects from this page.
// number of old objects in this page
uint16_t nold;
// number of old objects in this page during the previous full sweep
uint16_t prev_nold;
// number of free objects in this page.
// invalid if pool that owns this page is allocating objects from this page.
uint16_t nfree;
uint16_t osize; // size of each object in this page
uint16_t fl_begin_offset; // offset of first free object in this page
uint16_t fl_end_offset; // offset of last free object in this page
Expand Down Expand Up @@ -338,6 +359,9 @@ static inline void objprofile_reset(void)
}
#endif

// For debugging
void gc_count_pool(void);

#ifdef __cplusplus
}
#endif
Expand Down