diff --git a/base/inference.jl b/base/inference.jl index 814d7782d75557..43d358df50d6e7 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -961,7 +961,7 @@ function abstract_eval(e::ANY, vtypes, sv::StaticVarInfo) t = abstract_eval_call(e, vtypes, sv) elseif is(e.head,:null) t = Void - elseif is(e.head,:new) + elseif is(e.head,:new) || is(e.head,:stknew) t = abstract_eval(e.args[1], vtypes, sv) if isType(t) t = t.parameters[1] diff --git a/src/alloc.c b/src/alloc.c index dd431d0e04a01f..ef9571d1f7b922 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -99,7 +99,7 @@ jl_sym_t *compositetype_sym; jl_sym_t *type_goto_sym; jl_sym_t *global_sym; jl_sym_t *tuple_sym; jl_sym_t *dot_sym; jl_sym_t *newvar_sym; jl_sym_t *boundscheck_sym; jl_sym_t *copyast_sym; -jl_sym_t *fastmath_sym; +jl_sym_t *fastmath_sym; jl_sym_t *stknew_sym; jl_sym_t *simdloop_sym; jl_sym_t *meta_sym; jl_sym_t *arrow_sym; jl_sym_t *inert_sym; jl_sym_t *vararg_sym; diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 070e0c0c80fdba..8a05c922192f3b 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1790,7 +1790,7 @@ static void emit_write_barrier(jl_codectx_t* ctx, Value *parent, Value *ptr) { Value* parenttag = builder.CreateBitCast(emit_typeptr_addr(parent), T_psize); Value* parent_type = builder.CreateLoad(parenttag); - Value* parent_mark_bits = builder.CreateAnd(parent_type, 1); + Value* parent_mark_bits = builder.CreateAnd(parent_type, 3); // the branch hint does not seem to make it to the generated code //builder.CreateCall(expect_func, {parent_marked, ConstantInt::get(T_int1, 0)}); @@ -1857,7 +1857,7 @@ static void emit_setfield(jl_datatype_t *sty, const jl_cgval_t &strct, size_t id } } -static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **args, jl_codectx_t *ctx) +static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **args, jl_codectx_t *ctx, bool on_stack = false) { assert(jl_is_datatype(ty)); assert(jl_is_leaf_type(ty)); @@ -1913,10 +1913,18 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg if (might_need_root(args[1]) || !fval_info.isboxed) make_gcroot(f1, ctx); } - Value *strct = emit_allocobj(sty->size); + Value *strct; + Value *tag = literal_pointer_val((jl_value_t*)ty); + if (on_stack) { + strct = builder.CreateConstGEP1_32(builder.CreateBitCast(builder.CreateAlloca(T_int8, ConstantInt::get(T_size, sty->size + sizeof(void*))), jl_pvalue_llvmt), 1); + // set the gc bits as marked & young + tag = builder.CreateIntToPtr(builder.CreateOr(builder.CreatePtrToInt(tag, T_size), 3), jl_pvalue_llvmt); + } + else { + strct = emit_allocobj(sty->size); + } jl_cgval_t strctinfo = mark_julia_type(strct, ty); - builder.CreateStore(literal_pointer_val((jl_value_t*)ty), - emit_typeptr_addr(strct)); + builder.CreateStore(tag, emit_typeptr_addr(strct)); if (f1) { jl_cgval_t f1info = mark_julia_type(f1, jl_any_type); if (!jl_subtype(expr_type(args[1],ctx), jl_field_type(sty,0), 0)) diff --git a/src/codegen.cpp b/src/codegen.cpp index e20d399ce4402e..8e589218969bb4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3443,13 +3443,13 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, b jl_add_linfo_root(ctx->linfo, extype); return mark_julia_const(extype); } - else if (head == new_sym) { + else if (head == new_sym || head == stknew_sym) { jl_value_t *ty = expr_type(args[0], ctx); size_t nargs = jl_array_len(ex->args); if (jl_is_type_type(ty) && jl_is_datatype(jl_tparam0(ty)) && jl_is_leaf_type(jl_tparam0(ty))) { - return emit_new_struct(jl_tparam0(ty),nargs,args,ctx); + return emit_new_struct(jl_tparam0(ty),nargs,args,ctx, head == stknew_sym); } Value *typ = boxed(emit_expr(args[0], ctx), ctx); Value *val = emit_jlcall(jlnew_func, typ, &args[1], nargs-1, ctx); diff --git a/src/gc.c b/src/gc.c index 5dd527fa4daec6..473dc64cd86bd9 100644 --- a/src/gc.c +++ b/src/gc.c @@ -516,8 +516,9 @@ static inline void objprofile_count(void* ty, int old, int sz) #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 -static inline int gc_setmark_big(void *o, int mark_mode) +static inline int gc_setmark_big(void *o, int bits) { + int mark_mode = GC_MARKED_NOESC; #ifdef GC_VERIFY if (verifying) { _gc_setmark(o, mark_mode); @@ -525,21 +526,20 @@ static inline int gc_setmark_big(void *o, int mark_mode) } #endif bigval_t* hdr = bigval_header(o); - int bits = gc_bits(o); - if (bits == GC_QUEUED || bits == GC_MARKED) - mark_mode = GC_MARKED; - if ((mark_mode == GC_MARKED) & (bits != GC_MARKED)) { - // Move hdr from big_objects list to big_objects_marked list - *hdr->prev = hdr->next; - if (hdr->next) - hdr->next->prev = hdr->prev; - hdr->next = big_objects_marked; - hdr->prev = &big_objects_marked; - if (big_objects_marked) - big_objects_marked->prev = &hdr->next; - big_objects_marked = hdr; - } if (!(bits & GC_MARKED)) { + if (bits == GC_QUEUED) + mark_mode = GC_MARKED; + if ((mark_mode == GC_MARKED) & (bits != GC_MARKED)) { + // Move hdr from big_objects list to big_objects_marked list + *hdr->prev = hdr->next; + if (hdr->next) + hdr->next->prev = hdr->prev; + hdr->next = big_objects_marked; + hdr->prev = &big_objects_marked; + if (big_objects_marked) + big_objects_marked->prev = &hdr->next; + big_objects_marked = hdr; + } if (mark_mode == GC_MARKED) perm_scanned_bytes += hdr->sz&~3; else @@ -547,26 +547,25 @@ static inline int gc_setmark_big(void *o, int mark_mode) #ifdef OBJPROFILE objprofile_count(jl_typeof(o), mark_mode == GC_MARKED, hdr->sz&~3); #endif + _gc_setmark(o, mark_mode); } - _gc_setmark(o, mark_mode); verify_val(jl_valueof(o)); return mark_mode; } -static inline int gc_setmark_pool(void *o, int mark_mode) +static inline int gc_setmark_pool(void *o, int bits) { + int mark_mode = GC_MARKED_NOESC; #ifdef GC_VERIFY if (verifying) { _gc_setmark(o, mark_mode); return mark_mode; } #endif - gcpage_t* page = page_metadata(o); - int bits = gc_bits(o); - if (bits == GC_QUEUED || bits == GC_MARKED) { - mark_mode = GC_MARKED; - } if (!(bits & GC_MARKED)) { + if (bits == GC_QUEUED) + mark_mode = GC_MARKED; + gcpage_t* page = page_metadata(o); if (mark_mode == GC_MARKED) perm_scanned_bytes += page->osize; else @@ -574,9 +573,9 @@ static inline int gc_setmark_pool(void *o, int mark_mode) #ifdef OBJPROFILE objprofile_count(jl_typeof(o), mark_mode == GC_MARKED, page->osize); #endif + _gc_setmark(o, mark_mode); + page->gc_bits |= mark_mode; } - _gc_setmark(o, mark_mode); - page->gc_bits |= mark_mode; verify_val(jl_valueof(o)); return mark_mode; } @@ -601,14 +600,19 @@ static inline int gc_setmark(jl_value_t *v, int sz, int mark_mode) inline void gc_setmark_buf(void *o, int mark_mode) { buff_t *buf = gc_val_buf(o); + int bits = gc_bits(buf); + if (mark_mode == GC_MARKED && bits != GC_MARKED) { + _gc_setmark(buf, GC_QUEUED); + bits = GC_QUEUED; + } #ifdef MEMDEBUG - gc_setmark_big(buf, mark_mode); + gc_setmark_big(buf, bits); return; #endif if (buf->pooled) - gc_setmark_pool(buf, mark_mode); + gc_setmark_pool(buf, bits); else - gc_setmark_big(buf, mark_mode); + gc_setmark_big(buf, bits); } static NOINLINE void *malloc_page(void) @@ -1450,16 +1454,20 @@ void jl_gc_setmark(jl_value_t *v) // TODO rename this as it is misleading now if (!gc_marked(o)) { // objprofile_count(jl_typeof(v), 1, 16); #ifdef MEMDEBUG - gc_setmark_big(o, GC_MARKED_NOESC); + gc_setmark_big(o, gc_bits(o)); #else - gc_setmark_pool(o, GC_MARKED_NOESC); + gc_setmark_pool(o, gc_bits(o)); #endif } // perm_scanned_bytes = s; } - +static char* gc_stack_top; +static char* gc_stack_bot; static void gc_mark_stack(jl_value_t* ta, jl_gcframe_t *s, ptrint_t offset, int d) { + jl_task_t *task = (jl_task_t*)ta; + char *stkbuf = task == jl_current_task ? gc_stack_top : task->stkbuf; + char *stkend = task == jl_current_task ? gc_stack_bot : stkbuf + task->ssize; while (s != NULL) { s = (jl_gcframe_t*)((char*)s + offset); jl_value_t ***rts = (jl_value_t***)(((void**)s)+2); @@ -1473,9 +1481,17 @@ static void gc_mark_stack(jl_value_t* ta, jl_gcframe_t *s, ptrint_t offset, int } else { for(size_t i=0; i < nr; i++) { - if (rts[i] != NULL) { + void *v = rts[i]; + if (v != NULL) { verify_parent2("task", ta, &rts[i], "stack(%d)", (int)i); - gc_push_root(rts[i], d); + if (stkbuf <= (char*)v && (char*)v <= stkend) { + // if v is on the stack it is kept permanently marked + // but we still need to scan it once + push_root(v, d, gc_bits(jl_astaggedvalue(v))); + } + else { + gc_push_root(v, d); + } } } } @@ -1576,12 +1592,12 @@ static int push_root(jl_value_t *v, int d, int bits) int refyoung = 0; if (vt == (jl_value_t*)jl_weakref_type) { - bits = gc_setmark(v, sizeof(jl_weakref_t), GC_MARKED_NOESC); + bits = gc_setmark(v, sizeof(jl_weakref_t), bits); goto ret; } if ((jl_is_datatype(vt) && ((jl_datatype_t*)vt)->pointerfree)) { int sz = jl_datatype_size(vt); - bits = gc_setmark(v, sz, GC_MARKED_NOESC); + bits = gc_setmark(v, sz, bits); goto ret; } #define MARK(v, s) do { \ @@ -1597,7 +1613,7 @@ static int push_root(jl_value_t *v, int d, int bits) // some values have special representations if (vt == (jl_value_t*)jl_simplevector_type) { size_t l = jl_svec_len(v); - MARK(v, bits = gc_setmark(v, l*sizeof(void*) + sizeof(jl_svec_t), GC_MARKED_NOESC)); + MARK(v, bits = gc_setmark(v, l*sizeof(void*) + sizeof(jl_svec_t), bits)); jl_value_t **data = ((jl_svec_t*)v)->data; for(size_t i=0; i < l; i++) { jl_value_t *elt = data[i]; @@ -1618,7 +1634,7 @@ static int push_root(jl_value_t *v, int d, int bits) #define _gc_setmark_pool gc_setmark_pool #endif MARK(a, - bits = _gc_setmark_pool(o, GC_MARKED_NOESC); + bits = _gc_setmark_pool(o, bits); if (a->how == 2 && todo) { objprofile_count(MATY, gc_bits(o) == GC_MARKED, array_nbytes(a)); if (gc_bits(o) == GC_MARKED) @@ -1628,7 +1644,7 @@ static int push_root(jl_value_t *v, int d, int bits) }); else MARK(a, - bits = gc_setmark_big(o, GC_MARKED_NOESC); + bits = gc_setmark_big(o, bits); if (a->how == 2 && todo) { objprofile_count(MATY, gc_bits(o) == GC_MARKED, array_nbytes(a)); if (gc_bits(o) == GC_MARKED) @@ -1670,11 +1686,11 @@ static int push_root(jl_value_t *v, int d, int bits) } } else if (vt == (jl_value_t*)jl_module_type) { - MARK(v, bits = gc_setmark(v, sizeof(jl_module_t), GC_MARKED_NOESC)); + MARK(v, bits = gc_setmark(v, sizeof(jl_module_t), bits)); refyoung |= gc_mark_module((jl_module_t*)v, d); } else if (vt == (jl_value_t*)jl_task_type) { - MARK(v, bits = gc_setmark(v, sizeof(jl_task_t), GC_MARKED_NOESC)); + MARK(v, bits = gc_setmark(v, sizeof(jl_task_t), bits)); gc_mark_task((jl_task_t*)v, d); // tasks should always be remarked since we do not trigger the write barrier // for stores to stack slots @@ -1697,7 +1713,7 @@ static int push_root(jl_value_t *v, int d, int bits) dtsz = NWORDS(sizeof(jl_datatype_t) + jl_datatype_nfields(v)*sizeof(jl_fielddesc_t))*sizeof(void*); else dtsz = jl_datatype_size(dt); - MARK(v, bits = gc_setmark(v, dtsz, GC_MARKED_NOESC)); + MARK(v, bits = gc_setmark(v, dtsz, bits)); int nf = (int)jl_datatype_nfields(dt); // TODO check if there is a perf improvement for objects with a lot of fields // int fdsz = sizeof(void*)*nf; @@ -1956,6 +1972,7 @@ void jl_gc_collect(int full) JL_SIGATOMIC_BEGIN(); jl_in_gc = 1; uint64_t t0 = jl_hrtime(); + gc_stack_top = (char*)&t0; int recollect = 0; #if defined(GC_TIME) int wb_activations = mark_sp - saved_mark_sp; @@ -2366,6 +2383,8 @@ void jl_gc_init(void) if (maxmem > max_collect_interval) max_collect_interval = maxmem; #endif + char _dummy; + gc_stack_bot = &_dummy; } // GC summary stats diff --git a/src/interpreter.c b/src/interpreter.c index 3e5c09aae245b7..b739e1efe3c1d4 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -244,7 +244,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl, size_t ng JL_GC_POP(); return rhs; } - else if (ex->head == new_sym) { + else if (ex->head == new_sym || ex->head == stknew_sym) { jl_value_t *thetype = eval(args[0], locals, nl, ngensym); jl_value_t *v=NULL; JL_GC_PUSH2(&thetype, &v); diff --git a/src/jltypes.c b/src/jltypes.c index 9e15ad69c6a7c3..8f556fd39a4663 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -3534,6 +3534,7 @@ void jl_init_types(void) dot_sym = jl_symbol("."); boundscheck_sym = jl_symbol("boundscheck"); fastmath_sym = jl_symbol("fastmath"); + stknew_sym = jl_symbol("stknew"); newvar_sym = jl_symbol("newvar"); copyast_sym = jl_symbol("copyast"); simdloop_sym = jl_symbol("simdloop"); diff --git a/src/julia.h b/src/julia.h index 7b0281f7681705..65ea1631a461bc 100644 --- a/src/julia.h +++ b/src/julia.h @@ -486,7 +486,7 @@ extern jl_sym_t *abstracttype_sym; extern jl_sym_t *bitstype_sym; extern jl_sym_t *compositetype_sym; extern jl_sym_t *type_goto_sym; extern jl_sym_t *global_sym; extern jl_sym_t *tuple_sym; extern jl_sym_t *boundscheck_sym; extern jl_sym_t *copyast_sym; -extern jl_sym_t *fastmath_sym; +extern jl_sym_t *fastmath_sym; extern jl_sym_t *stknew_sym; extern jl_sym_t *simdloop_sym; extern jl_sym_t *meta_sym; extern jl_sym_t *arrow_sym; extern jl_sym_t *inert_sym; @@ -580,13 +580,13 @@ void gc_setmark_buf(void *buf, int); static inline void jl_gc_wb_binding(jl_binding_t *bnd, void *val) // val isa jl_value_t* { - if (__unlikely((*((uintptr_t*)bnd-1) & 1) == 1 && (*(uintptr_t*)jl_astaggedvalue(val) & 1) == 0)) + if (__unlikely((*((uintptr_t*)bnd-1) & 3) == 1 && (*(uintptr_t*)jl_astaggedvalue(val) & 1) == 0)) gc_queue_binding(bnd); } static inline void jl_gc_wb(void *parent, void *ptr) // parent and ptr isa jl_value_t* { - if (__unlikely((*((uintptr_t*)jl_astaggedvalue(parent)) & 1) == 1 && + if (__unlikely((*((uintptr_t*)jl_astaggedvalue(parent)) & 3) == 1 && (*((uintptr_t*)jl_astaggedvalue(ptr)) & 1) == 0)) jl_gc_queue_root((jl_value_t*)parent); } @@ -602,7 +602,7 @@ static inline void jl_gc_wb_buf(void *parent, void *bufptr) // parent isa jl_val static inline void jl_gc_wb_back(void *ptr) // ptr isa jl_value_t* { // if ptr is marked - if(__unlikely((*((uintptr_t*)jl_astaggedvalue(ptr)) & 1) == 1)) { + if(__unlikely((*((uintptr_t*)jl_astaggedvalue(ptr)) & 3) == 1)) { jl_gc_queue_root((jl_value_t*)ptr); } }