From b0e692a6418460a4b87c81a6dbbe78cc73ea318e Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 22 Aug 2016 17:14:29 -0400 Subject: [PATCH] incremental deserialize: handle LambdaInfo identity uniquing this works to avoid having `Expr(:invoke)` creating unintentional copies of LambdaInfo objects when they show up in the system image fix #18184 --- doc/manual/modules.rst | 8 +++ src/alloc.c | 9 ++- src/codegen.cpp | 2 +- src/dump.c | 148 +++++++++++++++++++++++++++++++++-------- src/gf.c | 35 +++++----- src/julia_internal.h | 2 +- test/compile.jl | 5 ++ 7 files changed, 160 insertions(+), 49 deletions(-) diff --git a/doc/manual/modules.rst b/doc/manual/modules.rst index 72241c2ea00a7..4947aba6e6bd4 100644 --- a/doc/manual/modules.rst +++ b/doc/manual/modules.rst @@ -422,6 +422,14 @@ A few other points to be aware of: 4. WeakRef objects and finalizers are not currently handled properly by the serializer (this will be fixed in an upcoming release). +5. It is usually best to avoid capturing references to instances of internal metadata objects such as + Method, LambdaInfo, MethodTable, TypeMapLevel, TypeMapEntry + and fields of those objects, as this can confuse the serializer + and may not lead to the outcome you desire. + It is not necessarily an error to do this, + but you simply need to be prepared that the system will + try to copy some of these and to create a single unique instance of others. + It is sometimes helpful during module development to turn off incremental precompilation. The command line flag ``--compilecache={yes|no}`` enables you to toggle module precompilation on and off. When Julia is started with ``--compilecache=no`` the serialized modules in the compile cache are ignored when loading modules and module dependencies. diff --git a/src/alloc.c b/src/alloc.c index 79da428f63028..6255e056abb29 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -553,7 +553,7 @@ static jl_lambda_info_t *jl_copy_lambda(jl_lambda_info_t *linfo) } // return a new lambda-info that has some extra static parameters merged in -JL_DLLEXPORT jl_lambda_info_t *jl_get_specialized(jl_method_t *m, jl_tupletype_t *types, jl_svec_t *sp) +JL_DLLEXPORT jl_lambda_info_t *jl_get_specialized(jl_method_t *m, jl_tupletype_t *types, jl_svec_t *sp, int allow_exec) { jl_lambda_info_t *linfo = m->lambda_template; jl_lambda_info_t *new_linfo; @@ -565,6 +565,13 @@ JL_DLLEXPORT jl_lambda_info_t *jl_get_specialized(jl_method_t *m, jl_tupletype_t new_linfo->def = m; new_linfo->sparam_vals = sp; } + else if (!allow_exec) { + new_linfo = jl_copy_lambda(linfo); + new_linfo->specTypes = types; + new_linfo->def = m; + new_linfo->sparam_vals = sp; + jl_set_lambda_code_null(new_linfo); + } else { new_linfo = jl_instantiate_staged(m, types, sp); } diff --git a/src/codegen.cpp b/src/codegen.cpp index 2789963bbb14c..a15626019119c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1176,7 +1176,7 @@ void *jl_get_llvmf(jl_tupletype_t *tt, bool getwrapper, bool getdeclarations) linfo = jl_get_specialization1(tt); if (linfo == NULL) { linfo = jl_method_lookup_by_type( - ((jl_datatype_t*)jl_tparam0(tt))->name->mt, tt, 0, 0); + ((jl_datatype_t*)jl_tparam0(tt))->name->mt, tt, 0, 0, 1); if (linfo == NULL || jl_has_call_ambiguities(tt, linfo->def)) { JL_GC_POP(); return NULL; diff --git a/src/dump.c b/src/dump.c index 58905f0f3136f..6ee8d8980c83b 100644 --- a/src/dump.c +++ b/src/dump.c @@ -854,6 +854,11 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v) writetag(s->s, jl_method_type); jl_method_t *m = (jl_method_t*)v; union jl_typemap_t *tf = &m->specializations; + if (s->mode == MODE_MODULE || s->mode == MODE_MODULE_POSTWORK) { + int external = !module_in_worklist(m->module); + if (external) + jl_error("support for serializing a direct reference to an external Method not implemented"); + } if (tf->unknown && tf->unknown != jl_nothing) { // go through the t-func cache, replacing ASTs with just return // types for abstract argument types. these ASTs are generally @@ -879,6 +884,19 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v) else if (jl_is_lambda_info(v)) { writetag(s->s, jl_lambda_info_type); jl_lambda_info_t *li = (jl_lambda_info_t*)v; + jl_serialize_value(s, (jl_value_t*)li->specTypes); + write_int8(s->s, li->inferred); + if (s->mode == MODE_MODULE || s->mode == MODE_MODULE_POSTWORK) { + int external = li->def && !module_in_worklist(li->def->module); + write_uint8(s->s, external); + if (external) { + // also flag this in the backref table as special + uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); + assert(*bp != (uintptr_t)HT_NOTFOUND); + *bp |= 1; assert(((uintptr_t)HT_NOTFOUND)|1); + return; + } + } if (li->jlcall_api == 2) jl_serialize_value(s, jl_nothing); else @@ -890,8 +908,6 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v) jl_serialize_value(s, li->rettype); jl_serialize_value(s, (jl_value_t*)li->sparam_syms); jl_serialize_value(s, (jl_value_t*)li->sparam_vals); - jl_serialize_value(s, (jl_value_t*)li->specTypes); - write_int8(s->s, li->inferred); write_int8(s->s, li->pure); write_int8(s->s, li->inlineable); write_int8(s->s, li->isva); @@ -1389,7 +1405,7 @@ static jl_value_t *jl_deserialize_value_(jl_serializer_state *s, jl_value_t *vta isunboxed = !(elsize>>15); elsize = elsize&0x7fff; } - int pos = backref_list.len; + uintptr_t pos = backref_list.len; if (usetable) arraylist_push(&backref_list, NULL); size_t *dims = (size_t*)alloca(ndims*sizeof(size_t)); @@ -1452,6 +1468,7 @@ static jl_value_t *jl_deserialize_value_(jl_serializer_state *s, jl_value_t *vta jl_method_t *m = (jl_method_t*)jl_gc_alloc(ptls, sizeof(jl_method_t), jl_method_type); + memset(m, 0, sizeof(jl_method_type)); if (usetable) arraylist_push(&backref_list, m); m->specializations.unknown = jl_deserialize_value(s, (jl_value_t**)&m->specializations); @@ -1490,8 +1507,42 @@ static jl_value_t *jl_deserialize_value_(jl_serializer_state *s, jl_value_t *vta jl_lambda_info_t *li = (jl_lambda_info_t*)jl_gc_alloc(ptls, sizeof(jl_lambda_info_t), jl_lambda_info_type); + memset(li, 0, sizeof(jl_lambda_info_t)); + uintptr_t pos = backref_list.len; if (usetable) arraylist_push(&backref_list, li); + + li->specTypes = (jl_tupletype_t*)jl_deserialize_value(s, (jl_value_t**)&li->specTypes); + if (li->specTypes) jl_gc_wb(li, li->specTypes); + int inferred = read_int8(s->s); + li->inferred = inferred; + + if (s->mode == MODE_MODULE) { + int external = read_uint8(s->s); + if (external) { + assert(loc != NULL); + arraylist_push(&flagref_list, loc); + arraylist_push(&flagref_list, (void*)pos); + return (jl_value_t*)li; + } + } + if (s->mode == MODE_MODULE_POSTWORK) { + int external = read_uint8(s->s); + if (external) { + jl_datatype_t *ftype = jl_first_argument_datatype((jl_value_t*)li->specTypes); + jl_methtable_t *mt = ftype->name->mt; + li = jl_method_lookup_by_type(mt, li->specTypes, 1, 0, 0); + assert(li); + backref_list.items[pos] = li; + // if it can be inferred but isn't, encourage codegen to infer it + if (inferred && !li->inferred) { + jl_set_lambda_code_null(li); + li->inferred = 1; + } + return (jl_value_t*)li; + } + } + li->code = jl_deserialize_value(s, &li->code); jl_gc_wb(li, li->code); li->slotnames = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&li->slotnames); jl_gc_wb(li, li->slotnames); li->slottypes = jl_deserialize_value(s, &li->slottypes); jl_gc_wb(li, li->slottypes); @@ -1503,10 +1554,7 @@ static jl_value_t *jl_deserialize_value_(jl_serializer_state *s, jl_value_t *vta jl_gc_wb(li, li->sparam_syms); li->sparam_vals = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&li->sparam_vals); jl_gc_wb(li, li->sparam_vals); - li->specTypes = (jl_tupletype_t*)jl_deserialize_value(s, (jl_value_t**)&li->specTypes); - if (li->specTypes) jl_gc_wb(li, li->specTypes); li->unspecialized_ducttape = NULL; - li->inferred = read_int8(s->s); li->pure = read_int8(s->s); li->inlineable = read_int8(s->s); li->isva = read_int8(s->s); @@ -1530,7 +1578,7 @@ static jl_value_t *jl_deserialize_value_(jl_serializer_state *s, jl_value_t *vta return (jl_value_t*)li; } else if (vtag == (jl_value_t*)jl_module_type) { - int pos = backref_list.len; + uintptr_t pos = backref_list.len; if (usetable) arraylist_push(&backref_list, NULL); jl_sym_t *mname = (jl_sym_t*)jl_deserialize_value(s, NULL); @@ -1620,7 +1668,7 @@ static jl_value_t *jl_deserialize_value_(jl_serializer_state *s, jl_value_t *vta else if (vtag == (jl_value_t*)jl_datatype_type || vtag == (jl_value_t*)SmallDataType_tag) { int32_t sz = (vtag == (jl_value_t*)SmallDataType_tag ? read_uint8(s->s) : read_int32(s->s)); jl_value_t *v = jl_gc_alloc(ptls, sz, NULL); - int pos = backref_list.len; + uintptr_t pos = backref_list.len; if (usetable) arraylist_push(&backref_list, v); jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, &jl_astaggedvalue(v)->type); @@ -2327,30 +2375,72 @@ static void jl_recache_types(void) int offs = (int)(intptr_t)flagref_list.items[i++]; jl_value_t *v, *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; jl_datatype_t *dt, *t; - if (jl_is_datatype(o)) { - dt = (jl_datatype_t*)o; - v = dt->instance; - assert(dt->uid == -1); - t = jl_recache_type(dt, i, NULL); - } - else { - dt = (jl_datatype_t*)jl_typeof(o); + if (jl_is_lambda_info(o)) { + // lookup the real LambdaInfo based on the placeholder specTypes + jl_lambda_info_t *li = (jl_lambda_info_t*)o; + int inferred = li->inferred; + jl_datatype_t *argtypes = jl_recache_type(li->specTypes, i, NULL); + jl_datatype_t *ftype = jl_first_argument_datatype((jl_value_t*)argtypes); + jl_methtable_t *mt = ftype->name->mt; + jl_set_typeof(li, (void*)(intptr_t)0x30); // invalidate the old value to help catch errors + li = jl_method_lookup_by_type(mt, argtypes, 1, 0, 0); + assert(li); + // if it can be inferred but isn't, encourage codegen to infer it + if (inferred && !li->inferred) { + jl_set_lambda_code_null(li); + li->inferred = 1; + } + // update the backref list + if (loc) *loc = (jl_value_t*)li; + if (offs > 0) backref_list.items[offs] = li; v = o; - t = jl_recache_type(dt, i, v); - } - assert(dt); - if (t != dt) { - jl_set_typeof(dt, (void*)(intptr_t)0x10); // invalidate the old value to help catch errors - if ((jl_value_t*)dt == o) { - if (loc) *loc = (jl_value_t*)t; - if (offs > 0) backref_list.items[offs] = t; + size_t j = i; + while (j < flagref_list.len) { + jl_value_t **loc = (jl_value_t**)flagref_list.items[j]; + int offs = (int)(intptr_t)flagref_list.items[j+1]; + jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; + if ((jl_value_t*)v == o) { // same item, update this entry + if (loc) *loc = (jl_value_t*)li; + if (offs > 0) backref_list.items[offs] = li; + // delete this item from the flagref list, so it won't be re-encountered later + flagref_list.len -= 2; + if (j >= flagref_list.len) + break; + flagref_list.items[j+0] = flagref_list.items[flagref_list.len+0]; + flagref_list.items[j+1] = flagref_list.items[flagref_list.len+1]; + } + else { + j += 2; + } } } - if (t->instance != v) { - jl_set_typeof(v, (void*)(intptr_t)0x20); // invalidate the old value to help catch errors - if (v == o) { - *loc = t->instance; - if (offs > 0) backref_list.items[offs] = t->instance; + else { + if (jl_is_datatype(o)) { + dt = (jl_datatype_t*)o; + v = dt->instance; + assert(dt->uid == -1); + t = jl_recache_type(dt, i, NULL); + } + else { + dt = (jl_datatype_t*)jl_typeof(o); + v = o; + assert(dt->instance); + t = jl_recache_type(dt, i, v); + } + assert(dt); + if (t != dt) { + jl_set_typeof(dt, (void*)(intptr_t)0x10); // invalidate the old value to help catch errors + if ((jl_value_t*)dt == o) { + if (loc) *loc = (jl_value_t*)t; + if (offs > 0) backref_list.items[offs] = t; + } + } + if (t->instance != v) { + jl_set_typeof(v, (void*)(intptr_t)0x20); // invalidate the old value to help catch errors + if (v == o) { + *loc = t->instance; + if (offs > 0) backref_list.items[offs] = t->instance; + } } } } diff --git a/src/gf.c b/src/gf.c index 225ecf7569640..0e69c74090e73 100644 --- a/src/gf.c +++ b/src/gf.c @@ -117,7 +117,7 @@ static int8_t jl_cachearg_offset(jl_methtable_t *mt) /// ----- Insertion logic for special entries ----- /// -JL_DLLEXPORT jl_lambda_info_t *jl_get_specialized(jl_method_t *m, jl_tupletype_t *types, jl_svec_t *sp); +JL_DLLEXPORT jl_lambda_info_t *jl_get_specialized(jl_method_t *m, jl_tupletype_t *types, jl_svec_t *sp, int allow_exec); // get or create the LambdaInfo for a specialization JL_DLLEXPORT jl_lambda_info_t *jl_specializations_get_linfo(jl_method_t *m, jl_tupletype_t *type, jl_svec_t *sparams) @@ -128,7 +128,7 @@ JL_DLLEXPORT jl_lambda_info_t *jl_specializations_get_linfo(jl_method_t *m, jl_t JL_UNLOCK(&m->writelock); return (jl_lambda_info_t*)sf->func.value; } - jl_lambda_info_t *li = jl_get_specialized(m, type, sparams); + jl_lambda_info_t *li = jl_get_specialized(m, type, sparams, 1); JL_GC_PUSH1(&li); // TODO: fuse lookup and insert steps jl_typemap_insert(&m->specializations, (jl_value_t*)m, type, jl_emptysvec, NULL, jl_emptysvec, (jl_value_t*)li, 0, &tfunc_cache, NULL); @@ -611,7 +611,8 @@ static jl_lambda_info_t *cache_method(jl_methtable_t *mt, union jl_typemap_t *ca jl_tupletype_t *type, // the specialized type signature for type lambda jl_tupletype_t *tt, // the original tupletype of the signature jl_typemap_entry_t *m, - jl_svec_t *sparams) + jl_svec_t *sparams, + int allow_exec) { // caller must hold the mt->writelock jl_method_t *definition = m->func.method; @@ -797,13 +798,13 @@ static jl_lambda_info_t *cache_method(jl_methtable_t *mt, union jl_typemap_t *ca jl_typemap_insert(cache, parent, origtype, jl_emptysvec, type, guardsigs, (jl_value_t*)newmeth, jl_cachearg_offset(mt), &lambda_cache, NULL); - if (definition->traced && jl_method_tracer) + if (definition->traced && jl_method_tracer && allow_exec) jl_call_tracer(jl_method_tracer, (jl_value_t*)newmeth); JL_GC_POP(); return newmeth; } -static jl_lambda_info_t *jl_mt_assoc_by_type(jl_methtable_t *mt, jl_datatype_t *tt, int cache, int inexact) +static jl_lambda_info_t *jl_mt_assoc_by_type(jl_methtable_t *mt, jl_datatype_t *tt, int cache, int inexact, int allow_exec) { // caller must hold the mt->writelock jl_typemap_entry_t *entry = NULL; @@ -826,10 +827,10 @@ static jl_lambda_info_t *jl_mt_assoc_by_type(jl_methtable_t *mt, jl_datatype_t * sig = join_tsig(tt, entry->sig); jl_lambda_info_t *nf; if (!cache) { - nf = jl_get_specialized(m, sig, env); + nf = jl_get_specialized(m, sig, env, allow_exec); } else { - nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, sig, tt, entry, env); + nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, sig, tt, entry, env, allow_exec); } JL_GC_POP(); return nf; @@ -1143,7 +1144,7 @@ jl_tupletype_t *arg_type_tuple(jl_value_t **args, size_t nargs) } jl_lambda_info_t *jl_method_lookup_by_type(jl_methtable_t *mt, jl_tupletype_t *types, - int cache, int inexact) + int cache, int inexact, int allow_exec) { jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->cache, types, NULL, 0, 1, jl_cachearg_offset(mt)); if (entry) @@ -1156,7 +1157,7 @@ jl_lambda_info_t *jl_method_lookup_by_type(jl_methtable_t *mt, jl_tupletype_t *t } if (jl_is_leaf_type((jl_value_t*)types)) cache = 1; - jl_lambda_info_t *sf = jl_mt_assoc_by_type(mt, types, cache, inexact); + jl_lambda_info_t *sf = jl_mt_assoc_by_type(mt, types, cache, inexact, allow_exec); if (cache) { JL_UNLOCK(&mt->writelock); } @@ -1170,7 +1171,7 @@ jl_lambda_info_t *jl_method_lookup_by_type(jl_methtable_t *mt, jl_tupletype_t *t JL_DLLEXPORT int jl_method_exists(jl_methtable_t *mt, jl_tupletype_t *types) { - return jl_method_lookup_by_type(mt, types, 0, 0) != NULL; + return jl_method_lookup_by_type(mt, types, 0, 0, 1) != NULL; } jl_lambda_info_t *jl_method_lookup(jl_methtable_t *mt, jl_value_t **args, size_t nargs, int cache) @@ -1188,7 +1189,7 @@ jl_lambda_info_t *jl_method_lookup(jl_methtable_t *mt, jl_value_t **args, size_t jl_tupletype_t *tt = arg_type_tuple(args, nargs); jl_lambda_info_t *sf = NULL; JL_GC_PUSH2(&tt, &sf); - sf = jl_mt_assoc_by_type(mt, tt, cache, 0); + sf = jl_mt_assoc_by_type(mt, tt, cache, 0, 1); if (cache) { JL_UNLOCK(&mt->writelock); } @@ -1229,7 +1230,7 @@ static jl_lambda_info_t *jl_get_unspecialized(jl_lambda_info_t *method) JL_GC_POP(); } if (def->needs_sparam_vals_ducttape) { - method->unspecialized_ducttape = jl_get_specialized(def, method->specTypes, method->sparam_vals); + method->unspecialized_ducttape = jl_get_specialized(def, method->specTypes, method->sparam_vals, 1); jl_gc_wb(method, method->unspecialized_ducttape); method->unspecialized_ducttape->unspecialized_ducttape = method->unspecialized_ducttape; } @@ -1347,7 +1348,7 @@ jl_lambda_info_t *jl_get_specialization1(jl_tupletype_t *types) // not be the case JL_GC_PUSH1(&sf); JL_TRY { - sf = jl_method_lookup_by_type(mt, types, 1, 1); + sf = jl_method_lookup_by_type(mt, types, 1, 1, 1); } JL_CATCH { sf = NULL; } @@ -1379,8 +1380,8 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types) JL_DLLEXPORT jl_value_t *jl_get_spec_lambda(jl_tupletype_t *types) { - jl_value_t *li = (jl_value_t*)jl_get_specialization1(types); - return li ? li : jl_nothing; + jl_lambda_info_t *li = jl_get_specialization1(types); + return li ? (jl_value_t*)li : jl_nothing; } JL_DLLEXPORT int jl_has_call_ambiguities(jl_tupletype_t *types, jl_method_t *m) @@ -1909,7 +1910,7 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t **args, uint32_t nargs) JL_TIMING(METHOD_LOOKUP_SLOW); jl_tupletype_t *tt = arg_type_tuple(args, nargs); JL_GC_PUSH1(&tt); - mfunc = jl_mt_assoc_by_type(mt, tt, 1, 0); + mfunc = jl_mt_assoc_by_type(mt, tt, 1, 0, 1); JL_GC_POP(); } JL_UNLOCK(&mt->writelock); @@ -2000,7 +2001,7 @@ jl_value_t *jl_gf_invoke(jl_tupletype_t *types0, jl_value_t **args, size_t nargs if (func->invokes.unknown == NULL) func->invokes.unknown = jl_nothing; - mfunc = cache_method(mt, &func->invokes, entry->func.value, sig, tt, entry, tpenv); + mfunc = cache_method(mt, &func->invokes, entry->func.value, sig, tt, entry, tpenv, 1); } JL_UNLOCK(&method->writelock); } diff --git a/src/julia_internal.h b/src/julia_internal.h index d4e0f32e51058..618ac56bb1f86 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -309,7 +309,7 @@ int jl_is_toplevel_only_expr(jl_value_t *e); jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr); jl_lambda_info_t *jl_method_lookup_by_type(jl_methtable_t *mt, jl_tupletype_t *types, - int cache, int inexact); + int cache, int inexact, int allow_exec); jl_lambda_info_t *jl_method_lookup(jl_methtable_t *mt, jl_value_t **args, size_t nargs, int cache); jl_value_t *jl_gf_invoke(jl_tupletype_t *types, jl_value_t **args, size_t nargs); diff --git a/test/compile.jl b/test/compile.jl index 09dc51390edc6..d639ac6cf5e46 100644 --- a/test/compile.jl +++ b/test/compile.jl @@ -74,6 +74,9 @@ try (::Type{Vector{NominalValue{T, R}}}){T, R}() = 3 (::Type{Vector{NominalValue{T, T}}}){T}() = 4 (::Type{Vector{NominalValue{Int, Int}}})() = 5 + + #const some_method = @which Base.include("string") // FIXME: support for serializing a direct reference to an external Method not implemented + const some_linfo = @code_typed Base.include("string") end """) @test_throws ErrorException Core.kwfunc(Base.nothing) # make sure `nothing` didn't have a kwfunc (which would invalidate the attempted test) @@ -129,6 +132,8 @@ try Val{3}, Val{nothing}}, 0:25) + + @test Foo.some_linfo === @code_typed Base.include("string") end Baz_file = joinpath(dir, "Baz.jl")