From a4b8ce44c0fc06391c8dfc79fbc608d8ef8aeaa8 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Mon, 9 Jan 2017 13:34:42 +0800 Subject: [PATCH] Separate marking and scanning of the object Make sure in `gc_mark_obj` that the object will only be scanned once. --- src/gc.c | 182 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 101 insertions(+), 81 deletions(-) diff --git a/src/gc.c b/src/gc.c index d4739f71e9f493..f04a82c304ab0d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -447,12 +447,11 @@ STATIC_INLINE void gc_update_heap_size(int64_t sz_ub, int64_t sz_est) static inline uint16_t gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, int8_t mark_mode, uintptr_t tag) { + assert(!gc_marked(tag)); if (gc_verifying) { o->bits.gc = mark_mode; return mark_mode; } - if (gc_marked(tag)) - return (uint16_t)tag & 3; assert(find_region(o) == NULL); bigval_t *hdr = bigval_header(o); if (mark_reset_age) { @@ -490,6 +489,7 @@ static inline uint16_t gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, int8_t mark_mode, region_t *r, uintptr_t tag) { + assert(!gc_marked(tag)); #ifdef MEMDEBUG return gc_setmark_big(ptls, o, mark_mode, tag); #endif @@ -498,8 +498,6 @@ static inline uint16_t gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, o->bits.gc = mark_mode; return mark_mode; } - if (gc_marked(tag)) - return (uint16_t)tag & 3; jl_gc_pagemeta_t *page = page_metadata_(o, r); if (mark_reset_age) { // Reset the object as if it was just allocated @@ -536,12 +534,14 @@ static inline uint16_t gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, static inline uint16_t gc_setmark_pool(jl_ptls_t ptls, jl_taggedvalue_t *o, int8_t mark_mode, uintptr_t tag) { + assert(!gc_marked(tag)); return gc_setmark_pool_(ptls, o, mark_mode, find_region(o), tag); } static inline uint16_t gc_setmark(jl_ptls_t ptls, jl_value_t *v, int sz, uintptr_t tag) { + assert(!gc_marked(tag)); jl_taggedvalue_t *o = jl_astaggedvalue(v); if (sz <= GC_MAX_SZCLASS) return gc_setmark_pool(ptls, o, GC_MARKED, tag); @@ -554,6 +554,8 @@ inline void gc_setmark_buf(jl_ptls_t ptls, void *o, { jl_taggedvalue_t *buf = jl_astaggedvalue(o); uintptr_t tag = buf->header; + if (gc_marked(tag)) + return; // If the object is larger than the max pool size it can't be a pool object. // This should be accurate most of the time but there might be corner cases // where the size estimate is a little off so we do a pool lookup to make @@ -1190,7 +1192,8 @@ void gc_queue_binding(jl_binding_t *bnd) arraylist_push(&ptls->heap.rem_bindings, bnd); } -static int gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag); +static void gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag); +static uint16_t gc_mark_obj(jl_ptls_t ptls, jl_value_t *v, uintptr_t tag); #ifdef JL_DEBUG_BUILD static void *volatile gc_findval; // for usage from gdb, for finding the gc-root for a value #endif @@ -1204,9 +1207,15 @@ static inline int gc_push_root(jl_ptls_t ptls, void *v, int d) // v isa jl_value assert(v != NULL); jl_taggedvalue_t *o = jl_astaggedvalue(v); verify_val(v); - uintptr_t tag = o->header; - if (!gc_marked(tag)) - return !gc_old(gc_scan_obj(ptls, (jl_value_t*)v, d, tag)); + const uintptr_t tag = o->header; + if (!gc_marked(tag)) { + uint16_t mark_res = gc_mark_obj(ptls, (jl_value_t*)v, tag); + assert(gc_marked(o->header)); + if (mark_res >> 8) + gc_scan_obj(ptls, (jl_value_t*)v, d, + gc_set_bits(tag, mark_res & 0xff)); + return !gc_old(mark_res); + } return !gc_old(tag); } @@ -1387,43 +1396,37 @@ JL_DLLEXPORT void jl_gc_lookfor(jl_value_t *v) { lookforme = v; } */ #define MAX_MARK_DEPTH 400 -// mark v and recurse on its children (or store them on the mark stack when recursion depth becomes too high) -// it does so assuming the gc bits of v are "bits" and returns the new bits of v -// if v becomes GC_OLD_MARKED and some of its children are GC_MARKED (young), -// v is added to the remset -static int gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag) +// Scan an marked object `v` and recursively mark its children. +// The object will be queued on the mark stack when recursion depth +// becomes too high. +// It does so assuming that the tag of the (marked) object is `tag`. +// If `v` is `GC_OLD_MARKED` and some of its children are `GC_MARKED` (young), +// `v` is added to the remset +static void gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag) { assert(v != NULL); + assert(gc_marked(tag)); jl_datatype_t *vt = (jl_datatype_t*)(tag & ~(uintptr_t)15); - gc_assert_datatype(vt); +#ifdef JL_DEBUG_BUILD + gc_assert_datatype(vt); // should have checked in `gc_mark_obj` +#endif int refyoung = 0, nptr = 0; - // Do not initialize `bits` to catch branches forgetting to set `bits` - // using compiler warnings. - int bits; + const int8_t bits = tag & 0xf; - if (vt == jl_weakref_type) { - bits = gc_setmark(ptls, v, sizeof(jl_weakref_t), tag) & 0xff; - goto ret; - } - else if (vt == jl_string_type) { - bits = gc_setmark(ptls, v, jl_string_len(v) + sizeof(size_t) + 1, - tag) & 0xff; - goto ret; - } - if (vt->layout->pointerfree) { - int sz = jl_datatype_size(vt); - bits = gc_setmark(ptls, v, sz, tag) & 0xff; - goto ret; - } + assert(vt != jl_symbol_type); + // weakref should not be marked + if (vt == jl_weakref_type) + return; + // fast path + if (vt->layout->pointerfree) + return; d++; + if (d >= MAX_MARK_DEPTH) + goto queue_the_root; // some values have special representations if (vt == jl_simplevector_type) { size_t l = jl_svec_len(v); - bits = gc_setmark(ptls, v, l * sizeof(void*) + sizeof(jl_svec_t), - tag) & 0xff; - if (d >= MAX_MARK_DEPTH) - goto queue_the_root; jl_value_t **data = jl_svec_data(v); nptr += l; for(size_t i=0; i < l; i++) { @@ -1436,25 +1439,7 @@ static int gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag) } else if (vt->name == jl_array_typename) { jl_array_t *a = (jl_array_t*)v; - jl_taggedvalue_t *o = jl_astaggedvalue(v); jl_array_flags_t flags = a->flags; - uint16_t mark_res = (flags.pooled ? - gc_setmark_pool(ptls, o, GC_MARKED, tag) : - gc_setmark_big(ptls, o, GC_MARKED, tag)); - bits = mark_res & 0xff; - uint8_t tag_changed = mark_res >> 8; - if (flags.how == 2 && tag_changed) { - objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED, - array_nbytes(a)); - if (bits == GC_OLD_MARKED) { - perm_scanned_bytes += array_nbytes(a); - } - else { - scanned_bytes += array_nbytes(a); - } - } - if (d >= MAX_MARK_DEPTH) - goto queue_the_root; if (flags.how == 3) { jl_value_t *owner = jl_array_data_owner(a); refyoung |= gc_push_root(ptls, owner, d); @@ -1467,7 +1452,7 @@ static int gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag) gc_setmark_buf(ptls, (char*)a->data - a->offset*a->elsize, bits, array_nbytes(a)); } - if (flags.ptrarray && a->data!=NULL) { + if (flags.ptrarray && a->data != NULL) { size_t l = jl_array_len(a); if (l > 100000 && d > MAX_MARK_DEPTH-10) { // don't mark long arrays at high depth, to try to avoid @@ -1491,36 +1476,17 @@ static int gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag) } else if (vt == jl_module_type) { // should increase nptr here - bits = gc_setmark(ptls, v, sizeof(jl_module_t), tag) & 0xff; - if (d >= MAX_MARK_DEPTH) - goto queue_the_root; refyoung |= gc_mark_module(ptls, (jl_module_t*)v, d, bits); } else if (vt == jl_task_type) { // ditto nptr - bits = gc_setmark(ptls, v, sizeof(jl_task_t), tag) & 0xff; - if (d >= MAX_MARK_DEPTH) - goto queue_the_root; gc_mark_task(ptls, (jl_task_t*)v, d, bits); // tasks should always be remarked since we do not trigger the write barrier // for stores to stack slots refyoung = 1; } - else if (vt == jl_symbol_type) { - // symbols have their own allocator and are never freed - bits = GC_OLD_MARKED; - } else { - size_t dtsz = jl_datatype_size(vt); - bits = gc_setmark(ptls, v, dtsz, tag) & 0xff; - if (d >= MAX_MARK_DEPTH) - goto queue_the_root; - int nf = (int)jl_datatype_nfields(vt); - // TODO check if there is a perf improvement for objects with a lot of fields - // int fdsz = sizeof(void*)*nf; - // void** children = alloca(fdsz); - // int ci = 0; for(int i=0; i < nf; i++) { if (jl_field_isptr(vt, i)) { nptr++; @@ -1529,30 +1495,84 @@ static int gc_scan_obj(jl_ptls_t ptls, jl_value_t *v, int d, uintptr_t tag) jl_value_t *fld = *slot; if (fld) { verify_parent2("object", v, slot, "field(%d)", i); - //children[ci++] = fld; refyoung |= gc_push_root(ptls, fld, d); } } } - //while(ci) - // refyoung |= gc_push_root(ptls, children[--ci], d); } ret: - if (gc_verifying) - return bits; - if ((bits == GC_OLD_MARKED) && refyoung) { + if ((bits == GC_OLD_MARKED) && refyoung && !gc_verifying) { ptls->heap.remset_nptr += nptr; // v is an old object referencing young objects arraylist_push(ptls->heap.remset, v); } - return bits; + return; queue_the_root: if (mark_sp >= mark_stack_size) grow_mark_stack(); mark_stack[mark_sp++] = (jl_value_t*)v; - return bits; +} + +// Mark an object (without scanning it) +// The top `int8_t` of the return value is set to `1` if the object was not +// marked before (another thread might have marked it). The bottom `int8_t` +// of the return value is the new GC bits. +static uint16_t gc_mark_obj(jl_ptls_t ptls, jl_value_t *v, uintptr_t tag) +{ + assert(v != NULL); + assert(!gc_marked(tag)); + jl_datatype_t *vt = (jl_datatype_t*)(tag & ~(uintptr_t)15); + gc_assert_datatype(vt); + // Do not initialize `mark_res` to catch branches forgetting to set `mark_res` + // using compiler warnings. + uint16_t mark_res; + + // some values have special representations + if (vt == jl_simplevector_type) { + size_t l = jl_svec_len(v); + mark_res = gc_setmark(ptls, v, l * sizeof(void*) + sizeof(jl_svec_t), + tag); + } + else if (vt->name == jl_array_typename) { + jl_array_t *a = (jl_array_t*)v; + jl_taggedvalue_t *o = jl_astaggedvalue(v); + jl_array_flags_t flags = a->flags; + mark_res = (flags.pooled ? gc_setmark_pool(ptls, o, GC_MARKED, tag) : + gc_setmark_big(ptls, o, GC_MARKED, tag)); + if (flags.how == 2 && (mark_res >> 8)) { + uint8_t bits = mark_res & 0xff; + objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED, + array_nbytes(a)); + if (bits == GC_OLD_MARKED) { + perm_scanned_bytes += array_nbytes(a); + } + else { + scanned_bytes += array_nbytes(a); + } + } + } + else if (vt == jl_module_type) { + mark_res = gc_setmark(ptls, v, sizeof(jl_module_t), tag); + } + else if (vt == jl_task_type) { + mark_res = gc_setmark(ptls, v, sizeof(jl_task_t), tag); + } + else if (vt == jl_symbol_type) { + // symbols have their own allocator and are never freed + mark_res = GC_OLD_MARKED; + } + else if (vt == jl_string_type) { + mark_res = gc_setmark(ptls, v, jl_string_len(v) + sizeof(size_t) + 1, + tag); + } + else { + mark_res = gc_setmark(ptls, v, jl_datatype_size(vt), tag); + } + if (gc_verifying) + return mark_res | (1 << 8); + return mark_res; } void visit_mark_stack(jl_ptls_t ptls)