From d189cb3a7e28aba56ec3a43a9c5d4dc593587a3d Mon Sep 17 00:00:00 2001 From: Oscar Blumberg Date: Sat, 18 Jul 2015 04:19:16 -0400 Subject: [PATCH] GC support for stack allocated objects Only works with copy_stacks for now. The code is not used anywhere in codegen right now, it can be tested by using (:stknew typ args...) instead of :new. It is essentially the GC side of #8134 but without the global alloca stack & unmarking. Stack objects are kept marked (+young) permanently and are only scanned when coming from the owning task. --- base/inference.jl | 2 +- src/alloc.c | 2 +- src/builtins.c | 1 + src/cgutils.cpp | 25 +++++--- src/codegen.cpp | 4 +- src/gc.c | 141 ++++++++++++++++++++++++------------------- src/interpreter.c | 2 +- src/jltypes.c | 7 +++ src/julia.h | 8 ++- src/julia_internal.h | 4 +- 10 files changed, 117 insertions(+), 79 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index 75efb7b0872d9..d878bc6d803f8 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -1044,7 +1044,7 @@ function abstract_eval(e::ANY, vtypes, sv::VarInfo) t = abstract_eval_call(e, vtypes, sv) elseif is(e.head,:null) t = Void - elseif is(e.head,:new) + elseif is(e.head,:new) || is(e.head,:stknew) t = abstract_eval(e.args[1], vtypes, sv) if isType(t) t = t.parameters[1] diff --git a/src/alloc.c b/src/alloc.c index 6b9323a5e9ecd..69ebce3e634b5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -99,7 +99,7 @@ jl_sym_t *copyast_sym; jl_sym_t *fastmath_sym; jl_sym_t *pure_sym; jl_sym_t *simdloop_sym; jl_sym_t *meta_sym; jl_sym_t *inert_sym; jl_sym_t *vararg_sym; -jl_sym_t *unused_sym; +jl_sym_t *unused_sym; jl_sym_t *stknew_sym; typedef struct { int64_t a; diff --git a/src/builtins.c b/src/builtins.c index b4046aa892940..b41f00da61f39 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1185,6 +1185,7 @@ void jl_init_primitives(void) #endif add_builtin("ANY", jl_ANY_flag); + add_builtin("Carnavalue", jl_carnavalue); } // toys for debugging --------------------------------------------------------- diff --git a/src/cgutils.cpp b/src/cgutils.cpp index bc6e34e16dcfa..3bc029997078d 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1258,6 +1258,8 @@ static void emit_typecheck(const jl_cgval_t &x, jl_value_t *type, const std::str jl_codectx_t *ctx) { Value *istype; + if (jl_subtype(x.typ, type, 0)) + return; if (jl_is_type_type(type) || !jl_is_leaf_type(type)) { Value *vx = boxed(x, ctx); istype = builder. @@ -2113,9 +2115,9 @@ static Value *emit_allocobj(size_t static_size) // if ptr is NULL this emits a write barrier _back_ static void emit_write_barrier(jl_codectx_t *ctx, Value *parent, Value *ptr) { - Value *parenttag = builder.CreateBitCast(emit_typeptr_addr(parent), T_psize); - Value *parent_type = builder.CreateLoad(parenttag); - Value *parent_mark_bits = builder.CreateAnd(parent_type, 1); + Value* parenttag = builder.CreateBitCast(emit_typeptr_addr(parent), T_psize); + Value* parent_type = builder.CreateLoad(parenttag); + Value* parent_mark_bits = builder.CreateAnd(parent_type, 3); // the branch hint does not seem to make it to the generated code //builder.CreateCall(expect_func, {parent_marked, ConstantInt::get(T_int1, 0)}); @@ -2188,7 +2190,7 @@ static bool might_need_root(jl_value_t *ex) !jl_is_globalref(ex)); } -static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **args, jl_codectx_t *ctx) +static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **args, jl_codectx_t *ctx, bool on_stack = false) { assert(jl_is_datatype(ty)); assert(jl_is_leaf_type(ty)); @@ -2231,10 +2233,18 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg f1 = boxed(fval_info, ctx); j++; } - Value *strct = emit_allocobj(sty->size); + Value *strct; + Value *tag = literal_pointer_val((jl_value_t*)ty); + if (on_stack) { + strct = builder.CreateConstGEP1_32(builder.CreateBitCast(emit_static_alloca(T_int8, sty->size + sizeof(void*), ctx), T_pjlvalue), 1); + // set the gc bits as marked & young + tag = builder.CreateIntToPtr(builder.CreateOr(builder.CreatePtrToInt(tag, T_size), 3), T_pjlvalue); + } + else { + strct = emit_allocobj(sty->size); + } jl_cgval_t strctinfo = mark_julia_type(strct, true, ty, ctx); - builder.CreateStore(literal_pointer_val((jl_value_t*)ty), - emit_typeptr_addr(strct)); + builder.CreateStore(tag, emit_typeptr_addr(strct)); if (f1) { jl_cgval_t f1info = mark_julia_type(f1, true, jl_any_type, ctx); if (!jl_subtype(expr_type(args[1],ctx), jl_field_type(sty,0), 0)) @@ -2259,7 +2269,6 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg need_wb = true; } if (rhs.isboxed) { - if (!jl_subtype(expr_type(args[i],ctx), jl_svecref(sty->types,i-1), 0)) emit_typecheck(rhs, jl_svecref(sty->types,i-1), "new", ctx); } if (might_need_root(args[i])) // TODO: how to remove this? diff --git a/src/codegen.cpp b/src/codegen.cpp index 9f80d2c5cb39a..e769e530eee34 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3492,14 +3492,14 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx) jl_add_linfo_root(ctx->linfo, extype); return mark_julia_const(extype); } - else if (head == new_sym) { + else if (head == new_sym || head == stknew_sym) { jl_value_t *ty = expr_type(args[0], ctx); size_t nargs = jl_array_len(ex->args); if (jl_is_type_type(ty) && jl_is_datatype(jl_tparam0(ty)) && jl_is_leaf_type(jl_tparam0(ty))) { assert(nargs <= jl_datatype_nfields(jl_tparam0(ty))+1); - return emit_new_struct(jl_tparam0(ty),nargs,args,ctx); + return emit_new_struct(jl_tparam0(ty),nargs,args,ctx, head == stknew_sym); } Value *typ = boxed(emit_expr(args[0], ctx), ctx); Value *val = emit_jlcall(jlnew_func, typ, &args[1], nargs-1, ctx); diff --git a/src/gc.c b/src/gc.c index 4d8762674c6f7..75f51b4898880 100644 --- a/src/gc.c +++ b/src/gc.c @@ -738,8 +738,9 @@ static inline void objprofile_count(void *ty, int old, int sz) #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 -static inline int gc_setmark_big(void *o, int mark_mode) +static inline int gc_setmark_big(void *o, int bits) { + int mark_mode = GC_MARKED_NOESC; #ifdef GC_VERIFY if (verifying) { _gc_setmark(o, mark_mode); @@ -747,57 +748,55 @@ static inline int gc_setmark_big(void *o, int mark_mode) } #endif assert(find_region(o,1) == NULL); - bigval_t *hdr = bigval_header(o); - int bits = gc_bits(o); + bigval_t* hdr = bigval_header(o); if (bits == GC_QUEUED || bits == GC_MARKED) mark_mode = GC_MARKED; - if ((mark_mode == GC_MARKED) & (bits != GC_MARKED)) { - // Move hdr from big_objects list to big_objects_marked list - *hdr->prev = hdr->next; - if (hdr->next) - hdr->next->prev = hdr->prev; - hdr->next = big_objects_marked; - hdr->prev = &big_objects_marked; - if (big_objects_marked) - big_objects_marked->prev = &hdr->next; - big_objects_marked = hdr; - } if (!(bits & GC_MARKED)) { + if ((mark_mode == GC_MARKED) & (bits != GC_MARKED)) { + // Move hdr from big_objects list to big_objects_marked list + *hdr->prev = hdr->next; + if (hdr->next) + hdr->next->prev = hdr->prev; + hdr->next = big_objects_marked; + hdr->prev = &big_objects_marked; + if (big_objects_marked) + big_objects_marked->prev = &hdr->next; + big_objects_marked = hdr; + } if (mark_mode == GC_MARKED) perm_scanned_bytes += hdr->sz&~3; else scanned_bytes += hdr->sz&~3; objprofile_count(jl_typeof(jl_valueof(o)), mark_mode == GC_MARKED, hdr->sz&~3); + _gc_setmark(o, mark_mode); } - _gc_setmark(o, mark_mode); verify_val(jl_valueof(o)); return mark_mode; } -static inline int gc_setmark_pool(void *o, int mark_mode) +static inline int gc_setmark_pool(void *o, int bits) { + int mark_mode = GC_MARKED_NOESC; #ifdef GC_VERIFY if (verifying) { _gc_setmark(o, mark_mode); return mark_mode; } #endif - gcpage_t *page = page_metadata(o); - int bits = gc_bits(o); - if (bits == GC_QUEUED || bits == GC_MARKED) { + if (bits == GC_QUEUED || bits == GC_MARKED) mark_mode = GC_MARKED; - } if (!(bits & GC_MARKED)) { + gcpage_t* page = page_metadata(o); if (mark_mode == GC_MARKED) perm_scanned_bytes += page->osize; else scanned_bytes += page->osize; objprofile_count(jl_typeof(jl_valueof(o)), mark_mode == GC_MARKED, page->osize); + _gc_setmark(o, mark_mode); + page->gc_bits |= mark_mode; } - _gc_setmark(o, mark_mode); - page->gc_bits |= mark_mode; verify_val(jl_valueof(o)); return mark_mode; } @@ -822,14 +821,19 @@ static inline int gc_setmark(jl_value_t *v, int sz, int mark_mode) inline void gc_setmark_buf(void *o, int mark_mode) { buff_t *buf = gc_val_buf(o); + int bits = gc_bits(buf); + if (mark_mode == GC_MARKED && bits != GC_MARKED) { + _gc_setmark(buf, GC_QUEUED); + bits = GC_QUEUED; + } #ifdef MEMDEBUG - gc_setmark_big(buf, mark_mode); + gc_setmark_big(buf, bits); return; #endif if (buf->pooled) - gc_setmark_pool(buf, mark_mode); + gc_setmark_pool(buf, bits); else - gc_setmark_big(buf, mark_mode); + gc_setmark_big(buf, bits); } static NOINLINE void *malloc_page(void) @@ -1671,14 +1675,52 @@ void jl_gc_setmark(jl_value_t *v) // TODO rename this as it is misleading now jl_taggedvalue_t *o = jl_astaggedvalue(v); if (!gc_marked(o)) { #ifdef MEMDEBUG - gc_setmark_big(o, GC_MARKED_NOESC); + gc_setmark_big(o, gc_bits(o)); #else - gc_setmark_pool(o, GC_MARKED_NOESC); + gc_setmark_pool(o, gc_bits(o)); #endif } // perm_scanned_bytes = s; } +static char* gc_stack_top; +static char* gc_stack_bot; +static void gc_mark_stack(jl_value_t* ta, jl_gcframe_t *s, intptr_t offset, int d) +{ + jl_task_t *task = (jl_task_t*)ta; + char *stkbuf = task == jl_current_task ? gc_stack_top : task->stkbuf; + char *stkend = task == jl_current_task ? gc_stack_bot : stkbuf + task->ssize; + while (s != NULL) { + s = (jl_gcframe_t*)((char*)s + offset); + jl_value_t ***rts = (jl_value_t***)(((void**)s)+2); + size_t nr = s->nroots>>1; + if (s->nroots & 1) { + for(size_t i=0; i < nr; i++) { + jl_value_t **ptr = (jl_value_t**)((char*)rts[i] + offset); + if (*ptr != NULL) + gc_push_root(*ptr, d); + } + } + else { + for(size_t i=0; i < nr; i++) { + void *v = rts[i]; + if (v != NULL) { + verify_parent2("task", ta, &rts[i], "stack(%d)", (int)i); + if (stkbuf <= (char*)v && (char*)v <= stkend) { + // if v is on the stack it is kept permanently marked + // but we still need to scan it once + push_root(v, d, gc_bits(jl_astaggedvalue(v))); + } + else { + gc_push_root(v, d); + } + } + } + } + s = s->prev; + } +} + NOINLINE static int gc_mark_module(jl_module_t *m, int d) { size_t i; @@ -1716,31 +1758,6 @@ NOINLINE static int gc_mark_module(jl_module_t *m, int d) return refyoung; } -static void gc_mark_stack(jl_value_t *ta, jl_gcframe_t *s, intptr_t offset, int d) -{ - while (s != NULL) { - s = (jl_gcframe_t*)((char*)s + offset); - jl_value_t ***rts = (jl_value_t***)(((void**)s)+2); - size_t nr = s->nroots>>1; - if (s->nroots & 1) { - for(size_t i=0; i < nr; i++) { - jl_value_t **ptr = (jl_value_t**)((char*)rts[i] + offset); - if (*ptr != NULL) - gc_push_root(*ptr, d); - } - } - else { - for(size_t i=0; i < nr; i++) { - if (rts[i] != NULL) { - verify_parent2("task", ta, &rts[i], "stack(%d)", (int)i); - gc_push_root(rts[i], d); - } - } - } - s = s->prev; - } -} - static void gc_mark_task_stack(jl_task_t *ta, int d) { int stkbuf = (ta->stkbuf != (void*)(intptr_t)-1 && ta->stkbuf != NULL); @@ -1797,12 +1814,12 @@ static int push_root(jl_value_t *v, int d, int bits) int refyoung = 0, nptr = 0; if (vt == (jl_value_t*)jl_weakref_type) { - bits = gc_setmark(v, sizeof(jl_weakref_t), GC_MARKED_NOESC); + bits = gc_setmark(v, sizeof(jl_weakref_t), bits); goto ret; } if ((jl_is_datatype(vt) && ((jl_datatype_t*)vt)->pointerfree)) { int sz = jl_datatype_size(vt); - bits = gc_setmark(v, sz, GC_MARKED_NOESC); + bits = gc_setmark(v, sz, bits); goto ret; } #define MARK(v, s) do { \ @@ -1819,7 +1836,7 @@ static int push_root(jl_value_t *v, int d, int bits) if (vt == (jl_value_t*)jl_simplevector_type) { size_t l = jl_svec_len(v); MARK(v, bits = gc_setmark(v, l * sizeof(void*) + - sizeof(jl_svec_t), GC_MARKED_NOESC)); + sizeof(jl_svec_t), bits)); jl_value_t **data = jl_svec_data(v); nptr += l; for(size_t i=0; i < l; i++) { @@ -1841,7 +1858,7 @@ static int push_root(jl_value_t *v, int d, int bits) #define _gc_setmark_pool gc_setmark_pool #endif MARK(a, - bits = _gc_setmark_pool(o, GC_MARKED_NOESC); + bits = _gc_setmark_pool(o, bits); if (a->flags.how == 2 && todo) { objprofile_count(jl_malloc_tag, gc_bits(o) == GC_MARKED, array_nbytes(a)); if (gc_bits(o) == GC_MARKED) @@ -1851,7 +1868,7 @@ static int push_root(jl_value_t *v, int d, int bits) }); else MARK(a, - bits = gc_setmark_big(o, GC_MARKED_NOESC); + bits = gc_setmark_big(o, bits); if (a->flags.how == 2 && todo) { objprofile_count(jl_malloc_tag, gc_bits(o) == GC_MARKED, array_nbytes(a)); if (gc_bits(o) == GC_MARKED) @@ -1895,12 +1912,12 @@ static int push_root(jl_value_t *v, int d, int bits) } else if (vt == (jl_value_t*)jl_module_type) { // should increase nptr here - MARK(v, bits = gc_setmark(v, sizeof(jl_module_t), GC_MARKED_NOESC)); + MARK(v, bits = gc_setmark(v, sizeof(jl_module_t), bits)); refyoung |= gc_mark_module((jl_module_t*)v, d); } else if (vt == (jl_value_t*)jl_task_type) { // ditto nptr - MARK(v, bits = gc_setmark(v, sizeof(jl_task_t), GC_MARKED_NOESC)); + MARK(v, bits = gc_setmark(v, sizeof(jl_task_t), bits)); gc_mark_task((jl_task_t*)v, d); // tasks should always be remarked since we do not trigger the write barrier // for stores to stack slots @@ -1922,8 +1939,7 @@ static int push_root(jl_value_t *v, int d, int bits) else { dtsz = jl_datatype_size(dt); } - MARK(v, bits = gc_setmark(v, dtsz, GC_MARKED_NOESC)); - + MARK(v, bits = gc_setmark(v, dtsz, bits)); int nf = (int)jl_datatype_nfields(dt); // TODO check if there is a perf improvement for objects with a lot of fields // int fdsz = sizeof(void*)*nf; @@ -2186,6 +2202,7 @@ static int sweep_mask = GC_MARKED; static void _jl_gc_collect(int full, char *stack_hi) { uint64_t t0 = jl_hrtime(); + gc_stack_top = (char*)&t0; int recollect = 0; #if defined(GC_TIME) int wb_activations = mark_sp - saved_mark_sp; @@ -2670,6 +2687,8 @@ void jl_gc_init(void) if (maxmem > max_collect_interval) max_collect_interval = maxmem; #endif + char _dummy; + gc_stack_bot = &_dummy; } // GC summary stats diff --git a/src/interpreter.c b/src/interpreter.c index af2dc188af829..1380267ede6b6 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -215,7 +215,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl, size_t ng JL_GC_POP(); return rhs; } - else if (ex->head == new_sym) { + else if (ex->head == new_sym || ex->head == stknew_sym) { jl_value_t *thetype = eval(args[0], locals, nl, ngensym); jl_value_t *v=NULL; JL_GC_PUSH2(&thetype, &v); diff --git a/src/jltypes.c b/src/jltypes.c index 6a06343380248..e0bda324ed812 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -64,6 +64,8 @@ JL_DLLEXPORT jl_value_t *jl_emptytuple=NULL; jl_svec_t *jl_emptysvec; jl_value_t *jl_nothing; +jl_datatype_t *jl_carnavalue; + // --- type properties and predicates --- int jl_is_type(jl_value_t *v) @@ -3535,6 +3537,10 @@ void jl_init_types(void) jl_ANY_flag = (jl_value_t*)tvar("ANY"); + jl_carnavalue = + jl_new_abstracttype((jl_value_t*)jl_symbol("Carnavalue"), + jl_any_type, jl_emptysvec); + // complete builtin type metadata jl_value_t *pointer_void = jl_apply_type((jl_value_t*)jl_pointer_type, jl_svec1(jl_void_type)); @@ -3609,6 +3615,7 @@ void jl_init_types(void) boundscheck_sym = jl_symbol("boundscheck"); inbounds_sym = jl_symbol("inbounds"); fastmath_sym = jl_symbol("fastmath"); + stknew_sym = jl_symbol("stknew"); newvar_sym = jl_symbol("newvar"); copyast_sym = jl_symbol("copyast"); simdloop_sym = jl_symbol("simdloop"); diff --git a/src/julia.h b/src/julia.h index cbb8bd73258f3..1918cd6f5f685 100644 --- a/src/julia.h +++ b/src/julia.h @@ -466,6 +466,8 @@ extern JL_DLLEXPORT jl_value_t *jl_true; extern JL_DLLEXPORT jl_value_t *jl_false; extern JL_DLLEXPORT jl_value_t *jl_nothing; +extern JL_DLLEXPORT jl_datatype_t *jl_carnavalue; + // some important symbols extern jl_sym_t *call_sym; extern jl_sym_t *dots_sym; extern jl_sym_t *vararg_sym; @@ -494,7 +496,7 @@ extern jl_sym_t *boundscheck_sym; extern jl_sym_t *inbounds_sym; extern jl_sym_t *copyast_sym; extern jl_sym_t *fastmath_sym; extern jl_sym_t *pure_sym; extern jl_sym_t *simdloop_sym; extern jl_sym_t *meta_sym; extern jl_sym_t *list_sym; -extern jl_sym_t *inert_sym; +extern jl_sym_t *inert_sym; extern jl_sym_t *stknew_sym; // gc ------------------------------------------------------------------------- @@ -569,7 +571,7 @@ JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *root); // root isa jl_value_t* STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) { // parent and ptr isa jl_value_t* - if (__unlikely((jl_astaggedvalue(parent)->gc_bits & 1) == 1 && + if (__unlikely((jl_astaggedvalue(parent)->gc_bits & 3) == 1 && (jl_astaggedvalue(ptr)->gc_bits & 1) == 0)) jl_gc_queue_root((jl_value_t*)parent); } @@ -577,7 +579,7 @@ STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) STATIC_INLINE void jl_gc_wb_back(void *ptr) // ptr isa jl_value_t* { // if ptr is marked - if (__unlikely((jl_astaggedvalue(ptr)->gc_bits & 1) == 1)) { + if (__unlikely((jl_astaggedvalue(ptr)->gc_bits & 3) == 1)) { jl_gc_queue_root((jl_value_t*)ptr); } } diff --git a/src/julia_internal.h b/src/julia_internal.h index 118fefc1958ce..b38c3e2baef8f 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -90,7 +90,7 @@ void gc_setmark_buf(void *buf, int); static inline void jl_gc_wb_binding(jl_binding_t *bnd, void *val) // val isa jl_value_t* { - if (__unlikely((jl_astaggedvalue(bnd)->gc_bits & 1) == 1 && + if (__unlikely((jl_astaggedvalue(bnd)->gc_bits & 3) == 1 && (jl_astaggedvalue(val)->gc_bits & 1) == 0)) gc_queue_binding(bnd); } @@ -98,7 +98,7 @@ static inline void jl_gc_wb_binding(jl_binding_t *bnd, void *val) // val isa jl_ static inline void jl_gc_wb_buf(void *parent, void *bufptr) // parent isa jl_value_t* { // if parent is marked and buf is not - if (__unlikely((jl_astaggedvalue(parent)->gc_bits & 1) == 1)) + if (__unlikely((jl_astaggedvalue(parent)->gc_bits & 3) == 1)) // (jl_astaggedvalue(bufptr)->gc_bits) != 1)) gc_setmark_buf(bufptr, jl_astaggedvalue(parent)->gc_bits); }