From faab0def8a67d6164722e312d0984295c426d8eb Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 7 Feb 2022 05:19:24 -0600 Subject: [PATCH] Replace the `.ji` serialization with sysimage format This unifies two serializers, `dump.c` (used for packages) and `staticdata.c` (used for system images). It adopts the `staticdata` strategy, adding support for external linkage, uniquing of MethodInstances & types, method extensions, external specializations, and invalidation. This lays the groundwork for native code caching as done with system images. Co-authored-by: Valentin Churavy Co-authored-by: Jameson Nash Co-authored-by: Tim Holy --- base/compiler/typeinfer.jl | 8 +- base/loading.jl | 83 +- deps/llvm.mk | 2 +- src/Makefile | 7 +- src/aotcompile.cpp | 27 +- src/clangsa/GCChecker.cpp | 2 +- src/codegen-stubs.c | 4 +- src/codegen.cpp | 14 +- src/datatype.c | 2 +- src/dlload.c | 2 +- src/dump.c | 3577 --------------------------------- src/gc.c | 25 +- src/gf.c | 2 +- src/init.c | 6 +- src/ircode.c | 132 ++ src/jitlayers.h | 1 - src/jl_exported_funcs.inc | 7 +- src/julia.expmap | 1 + src/julia.h | 19 +- src/julia_internal.h | 62 +- src/llvm-multiversioning.cpp | 55 +- src/method.c | 2 +- src/module.c | 25 +- src/precompile.c | 118 +- src/processor.cpp | 9 +- src/processor.h | 1 + src/processor_arm.cpp | 23 + src/processor_fallback.cpp | 25 + src/processor_x86.cpp | 22 + src/rtutils.c | 6 + src/staticdata.c | 2317 +++++++++++++++------ src/staticdata_utils.c | 1264 ++++++++++++ src/subtype.c | 4 +- src/support/arraylist.h | 2 +- src/support/rle.h | 9 +- src/threading.c | 2 + stdlib/LLD_jll/src/LLD_jll.jl | 1 - stdlib/Profile/src/Allocs.jl | 6 +- test/precompile.jl | 29 +- 39 files changed, 3469 insertions(+), 4434 deletions(-) delete mode 100644 src/dump.c create mode 100644 src/staticdata_utils.c diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 1a13cc051944e2..340f47babcc810 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -1,8 +1,8 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# Tracking of newly-inferred MethodInstances during precompilation +# Tracking of newly-inferred CodeInstances during precompilation const track_newly_inferred = RefValue{Bool}(false) -const newly_inferred = MethodInstance[] +const newly_inferred = CodeInstance[] # build (and start inferring) the inference frame for the top-level MethodInstance function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol) @@ -403,11 +403,11 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult) # TODO: also don't store inferred code if we've previously decided to interpret this function if !already_inferred inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result) - code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds) + code_cache(interp)[linfo] = ci = CodeInstance(result, inferred_result, valid_worlds) if track_newly_inferred[] m = linfo.def if isa(m, Method) && m.module != Core - ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo) + push!(newly_inferred, ci) end end end diff --git a/base/loading.jl b/base/loading.jl index a5df7c24408ae2..1e168d8a29e62b 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -898,7 +898,7 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any} end @debug "Loading cache file $path for $pkg" - sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods) + sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false) if isa(sv, Exception) return sv end @@ -973,7 +973,7 @@ function run_package_callbacks(modkey::PkgId) end # loads a precompile cache file, after checking stale_cachefile tests -function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64) +function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128) assert_havelock(require_lock) loaded = nothing if root_module_exists(modkey) @@ -1021,7 +1021,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St for i in 1:length(depmods) dep = depmods[i] dep isa Module && continue - _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt64} + _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128} @assert root_module_exists(depkey) dep = root_module(depkey) depmods[i] = dep @@ -1052,7 +1052,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String) local depmodnames io = open(path, "r") try - isvalid_cache_header(io) || return ArgumentError("Invalid header in cache file $path.") + iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") depmodnames = parse_cache_header(io)[3] isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.") finally @@ -1074,7 +1074,7 @@ end # returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it # returns the set of modules restored if the cache load succeeded -@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt64) +@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128) assert_havelock(require_lock) paths = find_all_in_cache_path(pkg) for path_to_try in paths::Vector{String} @@ -1087,7 +1087,7 @@ end for i in 1:length(staledeps) dep = staledeps[i] dep isa Module && continue - modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt64} + modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128} modpaths = find_all_in_cache_path(modkey) modfound = false for modpath_to_try in modpaths::Vector{String} @@ -1101,7 +1101,7 @@ end break end if !modfound - @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $modbuild_id is missing from the cache." + @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache." staledeps = true break end @@ -1153,7 +1153,7 @@ const package_callbacks = Any[] const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: -const _concrete_dependencies = Pair{PkgId,UInt64}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them +const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString) @@ -1406,7 +1406,7 @@ function _require(pkg::PkgId, env=nothing) # attempt to load the module file via the precompile cache locations if JLOptions().use_compiled_modules != 0 - m = _require_search_from_serialized(pkg, path, UInt64(0)) + m = _require_search_from_serialized(pkg, path, UInt128(0)) if m isa Module return m end @@ -1416,7 +1416,7 @@ function _require(pkg::PkgId, env=nothing) # but it was not handled by the precompile loader, complain for (concrete_pkg, concrete_build_id) in _concrete_dependencies if pkg == concrete_pkg - @warn """Module $(pkg.name) with build ID $concrete_build_id is missing from the cache. + @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache. This may mean $pkg does not support precompilation but is imported by a module that does.""" if JLOptions().incremental != 0 # during incremental precompilation, this should be fail-fast @@ -1785,9 +1785,13 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in close(tmpio) p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout) if success(p) - # append checksum to the end of the .ji file: - open(tmppath, "a+") do f - write(f, _crc32c(seekstart(f))) + # append extra crc to the end of the .ji file: + open(tmppath, "r+") do f + if iszero(isvalid_cache_header(f)) + error("Invalid header for $pkg in new cache file $(repr(tmppath)).") + end + seekstart(f) + write(f, _crc32c(f)) end # inherit permission from the source file (and make them writable) chmod(tmppath, filemode(path) & 0o777 | 0o200) @@ -1807,7 +1811,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in end end - # this is atomic according to POSIX: + # this is atomic according to POSIX (not Win32): rename(tmppath, cachefile; force=true) return cachefile end @@ -1817,13 +1821,16 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in if p.exitcode == 125 return PrecompilableError() else - error("Failed to precompile $pkg to $tmppath.") + error("Failed to precompile $pkg to $(repr(tmppath)).") end end -module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m) +function module_build_id(m::Module) + hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m) + return (UInt128(hi) << 64) | lo +end -isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios)) +isvalid_cache_header(f::IOStream) = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid},), f.ios) # returns checksum id or zero isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32)) struct CacheHeaderIncludes @@ -1897,13 +1904,14 @@ function parse_cache_header(f::IO) totbytes -= 8 @assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))" # read the list of modules that are required to be present during loading - required_modules = Vector{Pair{PkgId, UInt64}}() + required_modules = Vector{Pair{PkgId, UInt128}}() while true n = read(f, Int32) n == 0 && break sym = String(read(f, n)) # module name uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID - build_id = read(f, UInt64) # build id + build_id = UInt128(read(f, UInt64)) << 64 + build_id |= read(f, UInt64) push!(required_modules, PkgId(uuid, sym) => build_id) end return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash @@ -1912,17 +1920,17 @@ end function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) io = open(cachefile, "r") try - !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) ret = parse_cache_header(io) srcfiles_only || return ret - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret + _, (includes, _), _, srctextpos, _... = ret srcfiles = srctext_files(io, srctextpos) delidx = Int[] for (i, chi) in enumerate(includes) chi.filename ∈ srcfiles || push!(delidx, i) end deleteat!(includes, delidx) - return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash + return ret finally close(io) end @@ -1930,11 +1938,11 @@ end -preferences_hash(f::IO) = parse_cache_header(f)[end] +preferences_hash(f::IO) = parse_cache_header(f)[6] function preferences_hash(cachefile::String) io = open(cachefile, "r") try - if !isvalid_cache_header(io) + if iszero(isvalid_cache_header(io)) throw(ArgumentError("Invalid header in cache file $cachefile.")) end return preferences_hash(io) @@ -1945,14 +1953,14 @@ end function cache_dependencies(f::IO) - defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f) + _, (includes, _), modules, _... = parse_cache_header(f) return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime end function cache_dependencies(cachefile::String) io = open(cachefile, "r") try - !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) return cache_dependencies(io) finally close(io) @@ -1960,7 +1968,7 @@ function cache_dependencies(cachefile::String) end function read_dependency_src(io::IO, filename::AbstractString) - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io) + srctextpos = parse_cache_header(io)[4] srctextpos == 0 && error("no source-text stored in cache file") seek(io, srctextpos) return _read_dependency_src(io, filename) @@ -1983,7 +1991,7 @@ end function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try - !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) return read_dependency_src(io, filename) finally close(io) @@ -2173,12 +2181,13 @@ get_compiletime_preferences(::Nothing) = String[] # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey # otherwise returns the list of dependencies to also check @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false) - return stale_cachefile(PkgId(""), UInt64(0), modpath, cachefile; ignore_loaded) + return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded) end -@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt64, modpath::String, cachefile::String; ignore_loaded::Bool = false) +@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false) io = open(cachefile, "r") try - if !isvalid_cache_header(io) + checksum = isvalid_cache_header(io) + if iszero(checksum) @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end @@ -2191,9 +2200,12 @@ end @debug "Rejecting cache file $cachefile for $modkey since it is for $id instead" return true end - if build_id != UInt64(0) && id.second != build_id - @debug "Ignoring cache file $cachefile for $modkey since it is does not provide desired build_id" - return true + if build_id != UInt128(0) + id_build = (UInt128(checksum) << 64) | id.second + if id_build != build_id + @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))" + return true + end end id = id.first modules = Dict{PkgId, UInt64}(modules) @@ -2233,11 +2245,12 @@ end for (req_key, req_build_id) in _concrete_dependencies build_id = get(modules, req_key, UInt64(0)) if build_id !== UInt64(0) + build_id |= UInt128(checksum) << 64 if build_id === req_build_id skip_timecheck = true break end - @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $build_id) for $req_key (want $req_build_id)" + @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))" return true # cachefile doesn't provide the required version of the dependency end end diff --git a/deps/llvm.mk b/deps/llvm.mk index c13551ee331efc..78d037ec126d09 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -308,8 +308,8 @@ LLVM_TOOLS_JLL_TAGS := -llvm_version+$(LLVM_VER_MAJ) endif $(eval $(call bb-install,llvm,LLVM,false,true)) -$(eval $(call bb-install,clang,CLANG,false,true)) $(eval $(call bb-install,lld,LLD,false,true)) +$(eval $(call bb-install,clang,CLANG,false,true)) $(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true)) endif # USE_BINARYBUILDER_LLVM diff --git a/src/Makefile b/src/Makefile index 886a0a546ff3ac..8ac15083a73e29 100644 --- a/src/Makefile +++ b/src/Makefile @@ -42,7 +42,7 @@ endif SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ - dlload sys init task array dump staticdata toplevel jl_uv datatype \ + dlload sys init task array staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions \ threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ @@ -291,7 +291,6 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\ $(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h) $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h -$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h @@ -317,7 +316,7 @@ $(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h) $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c) -$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h +$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h @@ -453,7 +452,7 @@ SA_EXCEPTIONS-jloptions.c := -Xanalyzer -analyzer-config -Xana SA_EXCEPTIONS-subtype.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult" SA_EXCEPTIONS-codegen.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core" # these need to be annotated (and possibly fixed) -SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp +SKIP_IMPLICIT_ATOMICS := module.c staticdata.c codegen.cpp # these need to be annotated (and possibly fixed) SKIP_GC_CHECK := codegen.cpp rtutils.c diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 83e1c6d150430e..26ba66fa967371 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -92,7 +92,7 @@ typedef struct { std::vector jl_sysimg_fvars; std::vector jl_sysimg_gvars; std::map> jl_fvar_map; - std::map jl_value_to_llvm; // uses 1-based indexing + std::vector jl_value_to_llvm; } jl_native_code_desc_t; extern "C" JL_DLLEXPORT @@ -110,17 +110,12 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst, } extern "C" JL_DLLEXPORT -int32_t jl_get_llvm_gv_impl(void *native_code, jl_value_t *p) +void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs) { - // map a jl_value_t memory location to a GlobalVariable + // map a memory location (jl_value_t or jl_binding_t) to a GlobalVariable jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; - if (data) { - auto it = data->jl_value_to_llvm.find(p); - if (it != data->jl_value_to_llvm.end()) { - return it->second; - } - } - return 0; + arraylist_grow(gvs, data->jl_value_to_llvm.size()); + memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*)); } extern "C" JL_DLLEXPORT @@ -148,7 +143,6 @@ static void emit_offset_table(Module &mod, const std::vector &vars { // Emit a global variable with all the variable addresses. // The cloning pass will convert them into offsets. - assert(!vars.empty()); size_t nvars = vars.size(); std::vector addrs(nvars); for (size_t i = 0; i < nvars; i++) { @@ -258,9 +252,9 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing // all reachable & inferrrable functions. The `policy` flag switches between the default -// mode `0`, the extern mode `1`, and imaging mode `2`. +// mode `0`, the extern mode `1`. extern "C" JL_DLLEXPORT -void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy) +void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode) { ++CreateNativeCalls; CreateNativeMax.updateMax(jl_array_len(methods)); @@ -268,7 +262,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm cgparams = &jl_default_cgparams; jl_native_code_desc_t *data = new jl_native_code_desc_t; CompilationPolicy policy = (CompilationPolicy) _policy; - bool imaging = imaging_default() || policy == CompilationPolicy::ImagingMode; + bool imaging = imaging_default() || _imaging_mode == 1; jl_workqueue_t emitted; jl_method_instance_t *mi = NULL; jl_code_info_t *src = NULL; @@ -342,10 +336,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm // process the globals array, before jl_merge_module destroys them std::vector gvars; + data->jl_value_to_llvm.resize(params.globals.size()); for (auto &global : params.globals) { + data->jl_value_to_llvm.at(gvars.size()) = global.first; gvars.push_back(std::string(global.second->getName())); - data->jl_value_to_llvm[global.first] = gvars.size(); } CreateNativeMethods += emitted.size(); @@ -575,7 +570,7 @@ void jl_dump_native_impl(void *native_code, Type *T_psize = T_size->getPointerTo(); // add metadata information - if (imaging_default()) { + if (imaging_default() || jl_options.outputo) { emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize); emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_sysimg_fvars", T_psize); diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index 34821d6bac9cb5..513e6db606eb89 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -1332,7 +1332,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { } else if (name == "JL_GC_PUSH1" || name == "JL_GC_PUSH2" || name == "JL_GC_PUSH3" || name == "JL_GC_PUSH4" || name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6" || - name == "JL_GC_PUSH7") { + name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8") { ProgramStateRef State = C.getState(); // Transform slots to roots, transform values to rooted unsigned NumArgs = CE->getNumArgs(); diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 1f209f36291a2f..01324e349f08f6 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -13,7 +13,7 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, const char *sysimg_data, size_t sysimg_len) UNAVAILABLE -JL_DLLEXPORT int32_t jl_get_llvm_gv_fallback(void *native_code, jl_value_t *p) UNAVAILABLE +JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world, @@ -66,7 +66,7 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void) return 0; } -JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmctxt, const jl_cgparams_t *cgparams, int _policy) UNAVAILABLE +JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode) UNAVAILABLE JL_DLLEXPORT void jl_dump_compiles_fallback(void *s) { diff --git a/src/codegen.cpp b/src/codegen.cpp index f02815df37e733..ce8f8f4adf48c7 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2222,7 +2222,8 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line) { - assert(!ctx.emission_context.imaging); + if (ctx.emission_context.imaging) + return; // TODO if (filename == "" || filename == "none" || filename == "no file" || filename == "" || line < 0) return; visitLine(ctx, jl_coverage_data_pointer(filename, line), ConstantInt::get(getInt64Ty(ctx.builder.getContext()), 1), "lcnt"); @@ -2232,7 +2233,8 @@ static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line) static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync) { - assert(!ctx.emission_context.imaging); + if (ctx.emission_context.imaging) + return; // TODO if (filename == "" || filename == "none" || filename == "no file" || filename == "" || line < 0) return; Value *addend = sync @@ -4020,6 +4022,8 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const std::string name; StringRef protoname; bool need_to_emit = true; + // TODO: We should check if the code is available externally + // and then emit a trampoline. if (ctx.use_cache) { // optimization: emit the correct name immediately, if we know it // TODO: use `emitted` map here too to try to consolidate names? @@ -6785,7 +6789,7 @@ static jl_llvm_functions_t }(); std::string wrapName; - raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUniqueGeneratedNames++; + raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUniqueGeneratedNames++; declarations.functionObject = wrapName; (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) @@ -8260,6 +8264,10 @@ void jl_compile_workqueue( StringRef preal_decl = ""; bool preal_specsig = false; auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + // TODO: available_extern + // We need to emit a trampoline that loads the target address in an extern_module from a GV + // Right now we will unecessarily emit a function we have already compiled in a native module + // again in a calling module. if (params.cache && invoke != NULL) { auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (invoke == jl_fptr_args_addr) { diff --git a/src/datatype.c b/src/datatype.c index 9d22685473e075..b225ff3bd4fe24 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -72,7 +72,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu jl_atomic_store_relaxed(&tn->cache, jl_emptysvec); jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec); tn->names = NULL; - tn->hash = bitmix(bitmix(module ? module->build_id : 0, name->hash), 0xa1ada1da); + tn->hash = bitmix(bitmix(module ? module->build_id.lo : 0, name->hash), 0xa1ada1da); tn->_reserved = 0; tn->abstract = abstract; tn->mutabl = mutabl; diff --git a/src/dlload.c b/src/dlload.c index 57310c18b0e464..dd5d75da31a34e 100644 --- a/src/dlload.c +++ b/src/dlload.c @@ -73,7 +73,7 @@ const char *jl_crtdll_name = CRTDLL_BASENAME ".dll"; #define JL_RTLD(flags, FLAG) (flags & JL_RTLD_ ## FLAG ? RTLD_ ## FLAG : 0) #ifdef _OS_WINDOWS_ -static void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT +void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT { DWORD res; LPWSTR errmsg; diff --git a/src/dump.c b/src/dump.c deleted file mode 100644 index 96c875c4ec7f59..00000000000000 --- a/src/dump.c +++ /dev/null @@ -1,3577 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -/* - saving and restoring precompiled modules (.ji files) -*/ -#include -#include - -#include "julia.h" -#include "julia_internal.h" -#include "julia_gcext.h" -#include "builtin_proto.h" -#include "serialize.h" - -#ifndef _OS_WINDOWS_ -#include -#endif - -#include "valgrind.h" -#include "julia_assert.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// This file, together with ircode.c, allows (de)serialization between -// modules and *.ji cache files. `jl_save_incremental` gets called as the final step -// during package precompilation, and `_jl_restore_incremental` by `using SomePkg` -// whenever `SomePkg` has not yet been loaded. - -// Types, methods, and method instances form a graph that may have cycles, so -// serialization has to break these cycles. This is handled via "backreferences," -// referring to already (de)serialized items by an index. It is critial to ensure -// that the indexes of these backreferences align precisely during serialization -// and deserialization, to ensure that these integer indexes mean the same thing -// under both circumstances. Consequently, if you are modifying this file, be -// careful to match the sequence, if necessary reserving space for something that will -// be updated later. - -// It is also necessary to save & restore references to externally-defined -// objects, e.g., for package methods that call methods defined in Base or -// elsewhere. Consequently during deserialization there's a distinction between -// "reference" types, methods, and method instances (essentially like a -// GlobalRef), and "recached" version that refer to the actual entity in the -// running session. As a concrete example, types have a module in which they are -// defined, but once defined those types can be used by any dependent package. -// We don't store the full type definition again in that dependent package, we -// just encode a reference to that type. In the running session, such references -// are merely pointers to the type-cache, but the specific address is obviously -// not likely to be reproducible across sessions (it will differ between the -// session in which you precompile and the session in which you're using the -// package). Hence, during serialization we recode them as "verbose" references -// (that follow Julia syntax to allow them to be reconstructed), but on -// deserialization we have to replace those verbose references with the -// appropriate pointer in the user's running session. We complete -// deserialization before beginning the process of recaching, because we need -// the backreferences during deserialization and the actual objects during -// recaching. - -// Finally, because our backedge graph is not bidirectional, special handling is -// required to identify backedges from external methods that call internal methods. -// These get set aside and restored at the end of deserialization. - -// In broad terms, the major steps in serialization are: -// - starting from a "worklist" of modules, write the header. This stores things -// like the Julia build this was precompiled for, the package dependencies, -// the list of include files, file modification times, etc. -// - gather the collection of items to be written to this precompile file. This -// includes accessible from the module's binding table (if they are owned by a -// worklist module), but also includes things like methods added to external -// functions, instances of external methods that were newly type-inferred -// while precompiling a worklist module, and backedges of callees that were -// called by methods in this package. By and large, these latter items are not -// referenced by the module(s) in the package, and so these have to be -// extracted by traversing the entire system searching for things that do link -// back to a module in the worklist. -// - serialize all the items. The first time we encounter an item, we serialized -// it, and on future references (pointers) to that item we replace them with -// with a backreference. `jl_serialize_*` functions handle this work. -// - write source text for the files that defined the package. This is primarily -// to support Revise.jl. - -// Deserialization is the mirror image of serialization, but in some ways is -// trickier: -// - we have to merge items into the running session (recaching as described -// above) and handle cases like having two dependent packages caching the same -// MethodInstance of a dependency -// - we have to check for invalidation---the user might have loaded other -// packages that define methods that supersede some of the dispatches chosen -// when the package was precompiled, or this package might define methods that -// supersede dispatches for previously-loaded packages. These two -// possibilities are checked during backedge and method insertion, -// respectively. -// Both of these mean that deserialization requires one to look up a lot of -// things in the running session; for example, for invalidation checks we have -// to do type-intersection between signatures used for MethodInstances and the -// current session's full MethodTable. In practice, such steps dominate package -// loading time (it has very little to do with I/O or deserialization -// performance). Paradoxically, sometimes storing more code in a package can -// lead to faster performance: references to things in the same .ji file can be -// precomputed, but external references have to be looked up. You can see this -// effect in the benchmarks for #43990, where storing external MethodInstances -// and CodeInstances (more code than was stored previously) actually decreased -// load times for many packages. - -// Note that one should prioritize deserialization performance over serialization performance, -// since deserialization may be performed much more often than serialization. -// Certain items are preprocessed during serialization to save work when they are -// later deserialized. - - -// TODO: put WeakRefs on the weak_refs list during deserialization -// TODO: handle finalizers - -// type => tag hash for a few core types (e.g., Expr, PhiNode, etc) -static htable_t ser_tag; -// tag => type mapping, the reverse of ser_tag -static jl_value_t *deser_tag[256]; -// hash of some common symbols, encoded as CommonSym_tag plus 1 byte -static htable_t common_symbol_tag; -static jl_value_t *deser_symbols[256]; - -// table of all objects that have been deserialized, indexed by pos -// (the order in the serializer stream). the low -// bit is reserved for flagging certain entries and pos is -// left shift by 1 -static htable_t backref_table; // pos = backref_table[obj] -static int backref_table_numel; -static arraylist_t backref_list; // obj = backref_list[pos] - -// set of all CodeInstances yet to be (in)validated -static htable_t new_code_instance_validate; - -// list of (jl_value_t **loc, size_t pos) entries -// for anything that was flagged by the deserializer for later -// type-rewriting of some sort. pos is the index in backref_list. -static arraylist_t flagref_list; -// ref => value hash for looking up the "real" entity from -// the deserialized ref. Used for entities that must be unique, -// like types, methods, and method instances -static htable_t uniquing_table; - -// list of (size_t pos, itemkey) entries -// for the serializer to mark values in need of rework -// during deserialization later -// This includes items that need rehashing (IdDict, TypeMapLevels) -// and modules. -static arraylist_t reinit_list; - -// list of modules being serialized -// This is not quite globally rooted, but we take care to only -// ever assigned rooted values here. -static jl_array_t *serializer_worklist JL_GLOBALLY_ROOTED; -// The set of external MethodInstances we want to serialize -// (methods owned by other modules that were first inferred for a -// module currently being serialized) -static htable_t external_mis; -// Inference tracks newly-inferred MethodInstances during precompilation -// and registers them by calling jl_set_newly_inferred -static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED; -// Mutex for newly_inferred -static jl_mutex_t newly_inferred_mutex; - -// New roots to add to Methods. These can't be added until after -// recaching is complete, so we have to hold on to them separately -// Stored as method => (worklist_key, newroots) -// The worklist_key is the uuid of the module that triggered addition -// of `newroots`. This is needed because CodeInstances reference -// their roots by "index", and we use a bipartite index -// (module_uuid, integer_index) to make indexes "relocatable" -// (meaning that users can load modules in different orders and -// so the absolute integer index of a root is not reproducible). -// See the "root blocks" section of method.c for more detail. -static htable_t queued_method_roots; - -// inverse of backedges graph (caller=>callees hash) -jl_array_t *edges_map JL_GLOBALLY_ROOTED; // rooted for the duration of our uses of this - -// list of requested ccallable signatures -static arraylist_t ccallable_list; - -typedef struct { - ios_t *s; - jl_ptls_t ptls; - jl_array_t *loaded_modules_array; -} jl_serializer_state; - -static jl_value_t *jl_idtable_type = NULL; -static jl_typename_t *jl_idtable_typename = NULL; -static jl_value_t *jl_bigint_type = NULL; -static int gmp_limb_size = 0; - -static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT -{ - write_uint64(s, *((uint64_t*)&x)); -} - -void *jl_lookup_ser_tag(jl_value_t *v) -{ - return ptrhash_get(&ser_tag, v); -} - -void *jl_lookup_common_symbol(jl_value_t *v) -{ - return ptrhash_get(&common_symbol_tag, v); -} - -jl_value_t *jl_deser_tag(uint8_t tag) -{ - return deser_tag[tag]; -} - -jl_value_t *jl_deser_symbol(uint8_t tag) -{ - return deser_symbols[tag]; -} - -uint64_t jl_worklist_key(jl_array_t *worklist) -{ - assert(jl_is_array(worklist)); - size_t len = jl_array_len(worklist); - if (len > 0) { - jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1); - assert(jl_is_module(topmod)); - return topmod->build_id; - } - return 0; -} - -// --- serialize --- - -#define jl_serialize_value(s, v) jl_serialize_value_((s), (jl_value_t*)(v), 0) -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED; - -static void jl_serialize_cnull(jl_serializer_state *s, jl_value_t *t) -{ - backref_table_numel++; - write_uint8(s->s, TAG_CNULL); - jl_serialize_value(s, t); -} - -static int module_in_worklist(jl_module_t *mod) JL_NOTSAFEPOINT -{ - int i, l = jl_array_len(serializer_worklist); - for (i = 0; i < l; i++) { - jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, i); - if (jl_is_module(workmod) && jl_is_submodule(mod, workmod)) - return 1; - } - return 0; -} - -static int method_instance_in_queue(jl_method_instance_t *mi) -{ - return ptrhash_get(&external_mis, mi) != HT_NOTFOUND; -} - -// compute whether a type references something internal to worklist -// and thus could not have existed before deserialize -// and thus does not need delayed unique-ing -static int type_in_worklist(jl_datatype_t *dt) JL_NOTSAFEPOINT -{ - if (module_in_worklist(dt->name->module)) - return 1; - int i, l = jl_svec_len(dt->parameters); - for (i = 0; i < l; i++) { - jl_value_t *p = jl_unwrap_unionall(jl_tparam(dt, i)); - // TODO: what about Union and TypeVar?? - if (type_in_worklist((jl_datatype_t*)(jl_is_datatype(p) ? p : jl_typeof(p)))) - return 1; - } - return 0; -} - -static int type_recursively_external(jl_datatype_t *dt); - -static int type_parameter_recursively_external(jl_value_t *p0) JL_NOTSAFEPOINT -{ - if (!jl_is_concrete_type(p0)) - return 0; - jl_datatype_t *p = (jl_datatype_t*)p0; - //while (jl_is_unionall(p)) { - // if (!type_parameter_recursively_external(((jl_unionall_t*)p)->var->lb)) - // return 0; - // if (!type_parameter_recursively_external(((jl_unionall_t*)p)->var->ub)) - // return 0; - // p = (jl_datatype_t*)((jl_unionall_t*)p)->body; - //} - if (module_in_worklist(p->name->module)) - return 0; - if (p->name->wrapper != (jl_value_t*)p0) { - if (!type_recursively_external(p)) - return 0; - } - return 1; -} - -// returns true if all of the parameters are tag 6 or 7 -static int type_recursively_external(jl_datatype_t *dt) JL_NOTSAFEPOINT -{ - if (!dt->isconcretetype) - return 0; - if (jl_svec_len(dt->parameters) == 0) - return 1; - - int i, l = jl_svec_len(dt->parameters); - for (i = 0; i < l; i++) { - if (!type_parameter_recursively_external(jl_tparam(dt, i))) - return 0; - } - return 1; -} - -static void mark_backedges_in_worklist(jl_method_instance_t *mi, htable_t *visited, int found) -{ - int oldfound = (char*)ptrhash_get(visited, mi) - (char*)HT_NOTFOUND; - if (oldfound < 3) - return; // not in-progress - ptrhash_put(visited, mi, (void*)((char*)HT_NOTFOUND + 1 + found)); -#ifndef NDEBUG - jl_module_t *mod = mi->def.module; - if (jl_is_method(mod)) - mod = ((jl_method_t*)mod)->module; - assert(jl_is_module(mod)); - assert(!mi->precompiled && !module_in_worklist(mod)); - assert(mi->backedges); -#endif - size_t i = 0, n = jl_array_len(mi->backedges); - while (i < n) { - jl_method_instance_t *be; - i = get_next_edge(mi->backedges, i, NULL, &be); - mark_backedges_in_worklist(be, visited, found); - } -} - -// When we infer external method instances, ensure they link back to the -// package. Otherwise they might be, e.g., for external macros -static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, int depth) -{ - jl_module_t *mod = mi->def.module; - if (jl_is_method(mod)) - mod = ((jl_method_t*)mod)->module; - assert(jl_is_module(mod)); - if (mi->precompiled || module_in_worklist(mod)) { - return 1; - } - if (!mi->backedges) { - return 0; - } - void **bp = ptrhash_bp(visited, mi); - // HT_NOTFOUND: not yet analyzed - // HT_NOTFOUND + 1: no link back - // HT_NOTFOUND + 2: does link back - // HT_NOTFOUND + 3 + depth: in-progress - int found = (char*)*bp - (char*)HT_NOTFOUND; - if (found) - return found - 1; - *bp = (void*)((char*)HT_NOTFOUND + 3 + depth); // preliminarily mark as in-progress - size_t i = 0, n = jl_array_len(mi->backedges); - int cycle = 0; - while (i < n) { - jl_method_instance_t *be; - i = get_next_edge(mi->backedges, i, NULL, &be); - int child_found = has_backedge_to_worklist(be, visited, depth + 1); - if (child_found == 1) { - found = 1; - break; - } - else if (child_found >= 2 && child_found - 2 < cycle) { - // record the cycle will resolve at depth "cycle" - cycle = child_found - 2; - assert(cycle); - } - } - if (!found && cycle && cycle != depth) - return cycle + 2; - bp = ptrhash_bp(visited, mi); // re-acquire since rehashing might change the location - *bp = (void*)((char*)HT_NOTFOUND + 1 + found); - if (cycle) { - // If we are the top of the current cycle, now mark all other parts of - // our cycle by re-walking the backedges graph and marking all WIP - // items as found. - // Be careful to only re-walk as far as we had originally scanned above. - // Or if we found a backedge, also mark all of the other parts of the - // cycle as also having an backedge. - n = i; - i = 0; - while (i < n) { - jl_method_instance_t *be; - i = get_next_edge(mi->backedges, i, NULL, &be); - mark_backedges_in_worklist(be, visited, found); - } - } - return found; -} - -// given the list of MethodInstances that were inferred during the -// build, select those that are external and have at least one -// relocatable CodeInstance and are inferred to be called from the worklist -// or explicitly added by a precompile statement. -// Also prepares external_mis for method_instance_in_queue queries. -static jl_array_t *queue_external_mis(jl_array_t *list) -{ - if (list == NULL) - return NULL; - size_t i, n = 0; - htable_t visited; - assert(jl_is_array(list)); - size_t n0 = jl_array_len(list); - htable_new(&visited, n0); - for (i = 0; i < n0; i++) { - jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i); - assert(jl_is_method_instance(mi)); - if (jl_is_method(mi->def.value)) { - jl_method_t *m = mi->def.method; - if (!module_in_worklist(m->module)) { - jl_code_instance_t *ci = mi->cache; - while (ci) { - if (ci->max_world == ~(size_t)0 && ci->relocatability && ci->inferred) - break; - ci = jl_atomic_load_relaxed(&ci->next); - } - if (ci && ptrhash_get(&external_mis, mi) == HT_NOTFOUND) { - int found = has_backedge_to_worklist(mi, &visited, 1); - assert(found == 0 || found == 1); - if (found == 1) { - ptrhash_put(&external_mis, mi, ci); - n++; - } - } - } - } - } - htable_free(&visited); - if (n == 0) - return NULL; - jl_array_t *mi_list = jl_alloc_vec_any(n); - n = 0; - for (size_t i = 0; i < external_mis.size; i += 2) { - void *ci = external_mis.table[i+1]; - if (ci != HT_NOTFOUND) { - jl_array_ptr_set(mi_list, n++, (jl_value_t*)ci); - } - } - assert(n == jl_array_len(mi_list)); - return mi_list; -} - -static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_GC_DISABLED -{ - int tag = 0; - int internal = module_in_worklist(dt->name->module); - if (!internal && jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) { - tag = 6; // external primary type - } - else if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) { - tag = 0; // normal struct - } - else if (internal) { - if (jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) // comes up often since functions create types - tag = 5; // internal, and not in the typename cache - else - tag = 10; // anything else that's internal (just may need recaching) - } - else if (type_recursively_external(dt)) { - tag = 7; // external type that can be immediately recreated (with apply_type) - } - else if (type_in_worklist(dt)) { - tag = 11; // external, but definitely new (still needs caching, but not full unique-ing) - } - else { - // this is eligible for (and possibly requires) unique-ing later, - // so flag this in the backref table as special - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, dt); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - tag = 12; - } - - write_uint8(s->s, TAG_DATATYPE); - write_uint8(s->s, tag); - if (tag == 6 || tag == 7) { - // for tag==6, copy its typevars in case there are references to them elsewhere - jl_serialize_value(s, dt->name); - jl_serialize_value(s, dt->parameters); - return; - } - - int has_instance = (dt->instance != NULL); - int has_layout = (dt->layout != NULL); - write_uint8(s->s, has_layout | (has_instance << 1)); - write_uint8(s->s, dt->hasfreetypevars - | (dt->isconcretetype << 1) - | (dt->isdispatchtuple << 2) - | (dt->isbitstype << 3) - | (dt->zeroinit << 4) - | (dt->has_concrete_subtype << 5) - | (dt->cached_by_hash << 6) - | (dt->isprimitivetype << 7)); - write_int32(s->s, dt->hash); - - if (has_layout) { - uint8_t layout = 0; - if (dt->layout == ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout) { - layout = 1; - } - else if (dt->layout == jl_nothing_type->layout) { - layout = 2; - } - else if (dt->layout == ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->layout) { - layout = 3; - } - write_uint8(s->s, layout); - if (layout == 0) { - uint32_t nf = dt->layout->nfields; - uint32_t np = dt->layout->npointers; - size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); - ios_write(s->s, (const char*)dt->layout, sizeof(*dt->layout)); - size_t fldsize = nf * fieldsize; - if (dt->layout->first_ptr != -1) - fldsize += np << dt->layout->fielddesc_type; - ios_write(s->s, (const char*)(dt->layout + 1), fldsize); - } - } - - if (has_instance) - jl_serialize_value(s, dt->instance); - jl_serialize_value(s, dt->name); - jl_serialize_value(s, dt->parameters); - jl_serialize_value(s, dt->super); - jl_serialize_value(s, dt->types); -} - -static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) -{ - write_uint8(s->s, TAG_MODULE); - jl_serialize_value(s, m->name); - size_t i; - if (!module_in_worklist(m)) { - if (m == m->parent) { - // top-level module - write_int8(s->s, 2); - int j = 0; - for (i = 0; i < jl_array_len(s->loaded_modules_array); i++) { - jl_module_t *mi = (jl_module_t*)jl_array_ptr_ref(s->loaded_modules_array, i); - if (!module_in_worklist(mi)) { - if (m == mi) { - write_int32(s->s, j); - return; - } - j++; - } - } - assert(0 && "top level module not found in modules array"); - } - else { - write_int8(s->s, 1); - jl_serialize_value(s, m->parent); - } - return; - } - write_int8(s->s, 0); - jl_serialize_value(s, m->parent); - void **table = m->bindings.table; - for (i = 0; i < m->bindings.size; i += 2) { - if (table[i+1] != HT_NOTFOUND) { - jl_serialize_value(s, (jl_value_t*)table[i]); - jl_binding_t *b = (jl_binding_t*)table[i+1]; - jl_serialize_value(s, b->name); - jl_value_t *e = jl_atomic_load_relaxed(&b->value); - if (!b->constp && e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL) - // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) - jl_serialize_cnull(s, jl_typeof(e)); - else - jl_serialize_value(s, e); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref)); - jl_serialize_value(s, b->owner); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty)); - write_int8(s->s, (b->deprecated<<3) | (b->constp<<2) | (b->exportp<<1) | (b->imported)); - } - } - jl_serialize_value(s, NULL); - write_int32(s->s, m->usings.len); - for(i=0; i < m->usings.len; i++) { - jl_serialize_value(s, (jl_value_t*)m->usings.items[i]); - } - write_uint8(s->s, m->istopmod); - write_uint64(s->s, m->uuid.hi); - write_uint64(s->s, m->uuid.lo); - write_uint64(s->s, m->build_id); - write_int32(s->s, m->counter); - write_int32(s->s, m->nospecialize); - write_uint8(s->s, m->optlevel); - write_uint8(s->s, m->compile); - write_uint8(s->s, m->infer); - write_uint8(s->s, m->max_methods); -} - -static int jl_serialize_generic(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED -{ - if (v == NULL) { - write_uint8(s->s, TAG_NULL); - return 1; - } - - void *tag = ptrhash_get(&ser_tag, v); - if (tag != HT_NOTFOUND) { - uint8_t t8 = (intptr_t)tag; - if (t8 <= LAST_TAG) - write_uint8(s->s, 0); - write_uint8(s->s, t8); - return 1; - } - - if (jl_is_symbol(v)) { - void *idx = ptrhash_get(&common_symbol_tag, v); - if (idx != HT_NOTFOUND) { - write_uint8(s->s, TAG_COMMONSYM); - write_uint8(s->s, (uint8_t)(size_t)idx); - return 1; - } - } - else if (v == (jl_value_t*)jl_core_module) { - write_uint8(s->s, TAG_CORE); - return 1; - } - else if (v == (jl_value_t*)jl_base_module) { - write_uint8(s->s, TAG_BASE); - return 1; - } - - if (jl_typeis(v, jl_string_type) && jl_string_len(v) == 0) { - jl_serialize_value(s, jl_an_empty_string); - return 1; - } - else if (!jl_is_uint8(v)) { - void **bp = ptrhash_bp(&backref_table, v); - if (*bp != HT_NOTFOUND) { - uintptr_t pos = (char*)*bp - (char*)HT_NOTFOUND - 1; - if (pos < 65536) { - write_uint8(s->s, TAG_SHORT_BACKREF); - write_uint16(s->s, pos); - } - else { - write_uint8(s->s, TAG_BACKREF); - write_int32(s->s, pos); - } - return 1; - } - intptr_t pos = backref_table_numel++; - if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) { - // will need to rehash this, later (after types are fully constructed) - arraylist_push(&reinit_list, (void*)pos); - arraylist_push(&reinit_list, (void*)1); - } - if (jl_is_module(v)) { - jl_module_t *m = (jl_module_t*)v; - if (module_in_worklist(m) && !module_in_worklist(m->parent)) { - // will need to reinsert this into parent bindings, later (in case of any errors during reinsert) - arraylist_push(&reinit_list, (void*)pos); - arraylist_push(&reinit_list, (void*)2); - } - } - // TypeMapLevels need to be rehashed - if (jl_is_mtable(v)) { - arraylist_push(&reinit_list, (void*)pos); - arraylist_push(&reinit_list, (void*)3); - } - pos <<= 1; - ptrhash_put(&backref_table, v, (char*)HT_NOTFOUND + pos + 1); - } - - return 0; -} - -static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_t *codeinst, - int skip_partial_opaque, int force) JL_GC_DISABLED -{ - if (!force && jl_serialize_generic(s, (jl_value_t*)codeinst)) { - return; - } - assert(codeinst != NULL); // handle by jl_serialize_generic, but this makes clang-sa happy - - int validate = 0; - if (codeinst->max_world == ~(size_t)0 && codeinst->inferred) - // TODO: also check if this object is part of the codeinst cache and in edges_map - validate = 1; // can check on deserialize if this cache entry is still valid - int flags = validate << 0; - if (codeinst->invoke == jl_fptr_const_return) - flags |= 1 << 2; - if (codeinst->precompile) - flags |= 1 << 3; - - // CodeInstances with PartialOpaque return type are currently not allowed - // to be cached. We skip them in serialization here, forcing them to - // be re-infered on reload. - int write_ret_type = validate || codeinst->min_world == 0; - if (write_ret_type && codeinst->rettype_const && - jl_typeis(codeinst->rettype_const, jl_partial_opaque_type)) { - if (skip_partial_opaque) { - jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, 0); - return; - } - else { - jl_error("Cannot serialize CodeInstance with PartialOpaque rettype"); - } - } - - write_uint8(s->s, TAG_CODE_INSTANCE); - write_uint8(s->s, flags); - write_uint32(s->s, codeinst->ipo_purity_bits); - write_uint32(s->s, jl_atomic_load_relaxed(&codeinst->purity_bits)); - jl_serialize_value(s, (jl_value_t*)codeinst->def); - if (write_ret_type) { - jl_serialize_value(s, jl_atomic_load_relaxed(&codeinst->inferred)); - jl_serialize_value(s, codeinst->rettype_const); - jl_serialize_value(s, codeinst->rettype); - jl_serialize_value(s, codeinst->argescapes); - } - else { - // skip storing useless data - jl_serialize_value(s, NULL); - jl_serialize_value(s, NULL); - jl_serialize_value(s, jl_any_type); - jl_serialize_value(s, jl_nothing); - } - write_uint8(s->s, codeinst->relocatability); - jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, 0); -} - -enum METHOD_SERIALIZATION_MODE { - METHOD_INTERNAL = 1, - METHOD_EXTERNAL_MT = 2, - METHOD_HAS_NEW_ROOTS = 4, -}; - -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED -{ - if (jl_serialize_generic(s, v)) { - return; - } - - size_t i; - if (jl_is_svec(v)) { - size_t l = jl_svec_len(v); - if (l <= 255) { - write_uint8(s->s, TAG_SVEC); - write_uint8(s->s, (uint8_t)l); - } - else { - write_uint8(s->s, TAG_LONG_SVEC); - write_int32(s->s, l); - } - for (i = 0; i < l; i++) { - jl_serialize_value(s, jl_svecref(v, i)); - } - } - else if (jl_is_symbol(v)) { - size_t l = strlen(jl_symbol_name((jl_sym_t*)v)); - if (l <= 255) { - write_uint8(s->s, TAG_SYMBOL); - write_uint8(s->s, (uint8_t)l); - } - else { - write_uint8(s->s, TAG_LONG_SYMBOL); - write_int32(s->s, l); - } - ios_write(s->s, jl_symbol_name((jl_sym_t*)v), l); - } - else if (jl_is_array(v)) { - jl_array_t *ar = (jl_array_t*)v; - jl_value_t *et = jl_tparam0(jl_typeof(ar)); - int isunion = jl_is_uniontype(et); - if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) { - write_uint8(s->s, TAG_ARRAY1D); - write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f)); - } - else { - write_uint8(s->s, TAG_ARRAY); - write_uint16(s->s, ar->flags.ndims); - write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff)); - } - for (i = 0; i < ar->flags.ndims; i++) - jl_serialize_value(s, jl_box_long(jl_array_dim(ar,i))); - jl_serialize_value(s, jl_typeof(ar)); - size_t l = jl_array_len(ar); - if (ar->flags.ptrarray) { - for (i = 0; i < l; i++) { - jl_value_t *e = jl_array_ptr_ref(v, i); - if (e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL) - // reset Ptr elements to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) - jl_serialize_cnull(s, jl_typeof(e)); - else - jl_serialize_value(s, e); - } - } - else if (ar->flags.hasptr) { - const char *data = (const char*)jl_array_data(ar); - uint16_t elsz = ar->elsize; - size_t j, np = ((jl_datatype_t*)et)->layout->npointers; - for (i = 0; i < l; i++) { - const char *start = data; - for (j = 0; j < np; j++) { - uint32_t ptr = jl_ptr_offset((jl_datatype_t*)et, j); - const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr]; - if ((const char*)fld != start) - ios_write(s->s, start, (const char*)fld - start); - JL_GC_PROMISE_ROOTED(*fld); - jl_serialize_value(s, *fld); - start = (const char*)&fld[1]; - } - data += elsz; - if (data != start) - ios_write(s->s, start, data - start); - } - } - else if (jl_is_cpointer_type(et)) { - // reset Ptr elements to C_NULL - const void **data = (const void**)jl_array_data(ar); - for (i = 0; i < l; i++) { - const void *e = data[i]; - if (e != (void*)-1) - e = NULL; - ios_write(s->s, (const char*)&e, sizeof(e)); - } - } - else { - ios_write(s->s, (char*)jl_array_data(ar), l * ar->elsize); - if (jl_array_isbitsunion(ar)) - ios_write(s->s, jl_array_typetagdata(ar), l); - } - } - else if (jl_is_datatype(v)) { - jl_serialize_datatype(s, (jl_datatype_t*)v); - } - else if (jl_is_unionall(v)) { - write_uint8(s->s, TAG_UNIONALL); - jl_datatype_t *d = (jl_datatype_t*)jl_unwrap_unionall(v); - if (jl_is_datatype(d) && d->name->wrapper == v && - !module_in_worklist(d->name->module)) { - write_uint8(s->s, 1); - jl_serialize_value(s, d->name->module); - jl_serialize_value(s, d->name->name); - } - else { - write_uint8(s->s, 0); - jl_serialize_value(s, ((jl_unionall_t*)v)->var); - jl_serialize_value(s, ((jl_unionall_t*)v)->body); - } - } - else if (jl_is_typevar(v)) { - write_uint8(s->s, TAG_TVAR); - jl_serialize_value(s, ((jl_tvar_t*)v)->name); - jl_serialize_value(s, ((jl_tvar_t*)v)->lb); - jl_serialize_value(s, ((jl_tvar_t*)v)->ub); - } - else if (jl_is_method(v)) { - write_uint8(s->s, TAG_METHOD); - jl_method_t *m = (jl_method_t*)v; - uint64_t key = 0; - int serialization_mode = 0, nwithkey = 0; - if (m->is_for_opaque_closure || module_in_worklist(m->module)) - serialization_mode |= METHOD_INTERNAL; - if (!(serialization_mode & METHOD_INTERNAL)) { - key = jl_worklist_key(serializer_worklist); - nwithkey = nroots_with_key(m, key); - if (nwithkey > 0) - serialization_mode |= METHOD_HAS_NEW_ROOTS; - } - if (!(serialization_mode & METHOD_INTERNAL)) { - // flag this in the backref table as special - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - } - jl_serialize_value(s, (jl_value_t*)m->sig); - jl_serialize_value(s, (jl_value_t*)m->module); - if (m->external_mt != NULL) { - assert(jl_typeis(m->external_mt, jl_methtable_type)); - jl_methtable_t *mt = (jl_methtable_t*)m->external_mt; - if (!module_in_worklist(mt->module)) { - serialization_mode |= METHOD_EXTERNAL_MT; - } - } - write_uint8(s->s, serialization_mode); - if (serialization_mode & METHOD_EXTERNAL_MT) { - // We reference this method table by module and binding - jl_methtable_t *mt = (jl_methtable_t*)m->external_mt; - jl_serialize_value(s, mt->module); - jl_serialize_value(s, mt->name); - } - else { - jl_serialize_value(s, (jl_value_t*)m->external_mt); - } - if (!(serialization_mode & METHOD_INTERNAL)) { - if (serialization_mode & METHOD_HAS_NEW_ROOTS) { - // Serialize the roots that belong to key - write_uint64(s->s, key); - write_int32(s->s, nwithkey); - rle_iter_state rootiter = rle_iter_init(0); - uint64_t *rletable = NULL; - size_t nblocks2 = 0, nroots = jl_array_len(m->roots); - if (m->root_blocks) { - rletable = (uint64_t*)jl_array_data(m->root_blocks); - nblocks2 = jl_array_len(m->root_blocks); - } - // this visits every item, if it becomes a bottleneck we could hop blocks - while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2)) - if (rootiter.key == key) - jl_serialize_value(s, jl_array_ptr_ref(m->roots, rootiter.i)); - } - return; - } - jl_serialize_value(s, m->specializations); - jl_serialize_value(s, jl_atomic_load_relaxed(&m->speckeyset)); - jl_serialize_value(s, (jl_value_t*)m->name); - jl_serialize_value(s, (jl_value_t*)m->file); - write_int32(s->s, m->line); - write_int32(s->s, m->called); - write_int32(s->s, m->nargs); - write_int32(s->s, m->nospecialize); - write_int32(s->s, m->nkw); - write_int8(s->s, m->isva); - write_int8(s->s, m->pure); - write_int8(s->s, m->is_for_opaque_closure); - write_int8(s->s, m->constprop); - write_uint8(s->s, m->purity.bits); - jl_serialize_value(s, (jl_value_t*)m->slot_syms); - jl_serialize_value(s, (jl_value_t*)m->roots); - jl_serialize_value(s, (jl_value_t*)m->root_blocks); - write_int32(s->s, m->nroots_sysimg); - jl_serialize_value(s, (jl_value_t*)m->ccallable); - jl_serialize_value(s, (jl_value_t*)m->source); - jl_serialize_value(s, (jl_value_t*)m->unspecialized); - jl_serialize_value(s, (jl_value_t*)m->generator); - jl_serialize_value(s, (jl_value_t*)m->invokes); - jl_serialize_value(s, (jl_value_t*)m->recursion_relation); - } - else if (jl_is_method_instance(v)) { - jl_method_instance_t *mi = (jl_method_instance_t*)v; - if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) { - jl_error("unimplemented: serialization of MethodInstances for OpaqueClosure"); - } - write_uint8(s->s, TAG_METHOD_INSTANCE); - int internal = 0; - if (!jl_is_method(mi->def.method)) - internal = 1; - else if (module_in_worklist(mi->def.method->module)) - internal = 2; - write_uint8(s->s, internal); - if (!internal) { - // also flag this in the backref table as special - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - } - if (internal == 1) - jl_serialize_value(s, (jl_value_t*)mi->uninferred); - jl_serialize_value(s, (jl_value_t*)mi->specTypes); - jl_serialize_value(s, mi->def.value); - if (!internal) - return; - jl_serialize_value(s, (jl_value_t*)mi->sparam_vals); - jl_array_t *backedges = mi->backedges; - if (backedges) { - // filter backedges to only contain pointers - // to items that we will actually store (internal >= 2) - size_t ins = 0, i = 0, l = jl_array_len(backedges); - jl_value_t **b_edges = (jl_value_t**)jl_array_data(backedges); - jl_value_t *invokeTypes; - jl_method_instance_t *backedge; - while (i < l) { - i = get_next_edge(backedges, i, &invokeTypes, &backedge); - if (module_in_worklist(backedge->def.method->module) || method_instance_in_queue(backedge)) { - if (invokeTypes) - b_edges[ins++] = invokeTypes; - b_edges[ins++] = (jl_value_t*)backedge; - } - } - if (ins != l) - jl_array_del_end(backedges, l - ins); - if (ins == 0) - backedges = NULL; - } - jl_serialize_value(s, (jl_value_t*)backedges); - jl_serialize_value(s, (jl_value_t*)NULL); //callbacks - jl_serialize_code_instance(s, mi->cache, 1, 0); - } - else if (jl_is_code_instance(v)) { - jl_serialize_code_instance(s, (jl_code_instance_t*)v, 0, 1); - } - else if (jl_typeis(v, jl_module_type)) { - jl_serialize_module(s, (jl_module_t*)v); - } - else if (jl_typeis(v, jl_task_type)) { - jl_error("Task cannot be serialized"); - } - else if (jl_typeis(v, jl_opaque_closure_type)) { - jl_error("Live opaque closures cannot be serialized"); - } - else if (jl_typeis(v, jl_string_type)) { - write_uint8(s->s, TAG_STRING); - write_int32(s->s, jl_string_len(v)); - ios_write(s->s, jl_string_data(v), jl_string_len(v)); - } - else if (jl_typeis(v, jl_int64_type)) { - void *data = jl_data_ptr(v); - if (*(int64_t*)data >= INT16_MIN && *(int64_t*)data <= INT16_MAX) { - write_uint8(s->s, TAG_SHORTER_INT64); - write_uint16(s->s, (uint16_t)*(int64_t*)data); - } - else if (*(int64_t*)data >= S32_MIN && *(int64_t*)data <= S32_MAX) { - write_uint8(s->s, TAG_SHORT_INT64); - write_int32(s->s, (int32_t)*(int64_t*)data); - } - else { - write_uint8(s->s, TAG_INT64); - write_uint64(s->s, *(int64_t*)data); - } - } - else if (jl_typeis(v, jl_int32_type)) { - void *data = jl_data_ptr(v); - if (*(int32_t*)data >= INT16_MIN && *(int32_t*)data <= INT16_MAX) { - write_uint8(s->s, TAG_SHORT_INT32); - write_uint16(s->s, (uint16_t)*(int32_t*)data); - } - else { - write_uint8(s->s, TAG_INT32); - write_int32(s->s, *(int32_t*)data); - } - } - else if (jl_typeis(v, jl_uint8_type)) { - write_uint8(s->s, TAG_UINT8); - write_int8(s->s, *(int8_t*)jl_data_ptr(v)); - } - else if (jl_is_cpointer(v) && jl_unbox_voidpointer(v) == NULL) { - write_uint8(s->s, TAG_CNULL); - jl_serialize_value(s, jl_typeof(v)); - return; - } - else if (jl_bigint_type && jl_typeis(v, jl_bigint_type)) { - write_uint8(s->s, TAG_SHORT_GENERAL); - write_uint8(s->s, jl_datatype_size(jl_bigint_type)); - jl_serialize_value(s, jl_bigint_type); - jl_value_t *sizefield = jl_get_nth_field(v, 1); - jl_serialize_value(s, sizefield); - void *data = jl_unbox_voidpointer(jl_get_nth_field(v, 2)); - int32_t sz = jl_unbox_int32(sizefield); - size_t nb = (sz == 0 ? 1 : (sz < 0 ? -sz : sz)) * gmp_limb_size; - ios_write(s->s, (char*)data, nb); - } - else { - jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - if (v == t->instance) { - if (!type_in_worklist(t)) { - // also flag this in the backref table as special - // if it might not be unique (is external) - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - } - write_uint8(s->s, TAG_SINGLETON); - jl_serialize_value(s, t); - return; - } - assert(!t->instance && "detected singleton construction corruption"); - - if (t == jl_typename_type) { - void *bttag = ptrhash_get(&ser_tag, ((jl_typename_t*)t)->wrapper); - if (bttag != HT_NOTFOUND) { - write_uint8(s->s, TAG_BITYPENAME); - write_uint8(s->s, (uint8_t)(intptr_t)bttag); - return; - } - } - size_t tsz = jl_datatype_size(t); - if (tsz <= 255) { - write_uint8(s->s, TAG_SHORT_GENERAL); - write_uint8(s->s, tsz); - } - else { - write_uint8(s->s, TAG_GENERAL); - write_int32(s->s, tsz); - } - jl_serialize_value(s, t); - if (t == jl_typename_type) { - jl_typename_t *tn = (jl_typename_t*)v; - int internal = module_in_worklist(tn->module); - write_uint8(s->s, internal); - jl_serialize_value(s, tn->module); - jl_serialize_value(s, tn->name); - if (internal) { - jl_serialize_value(s, tn->names); - jl_serialize_value(s, tn->wrapper); - jl_serialize_value(s, tn->mt); - ios_write(s->s, (char*)&tn->hash, sizeof(tn->hash)); - write_uint8(s->s, tn->abstract | (tn->mutabl << 1) | (tn->mayinlinealloc << 2)); - write_uint8(s->s, tn->max_methods); - if (!tn->abstract) - write_uint16(s->s, tn->n_uninitialized); - size_t nb = tn->atomicfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0; - write_int32(s->s, nb); - if (nb) - ios_write(s->s, (char*)tn->atomicfields, nb); - nb = tn->constfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0; - write_int32(s->s, nb); - if (nb) - ios_write(s->s, (char*)tn->constfields, nb); - } - return; - } - - if (jl_is_foreign_type(t)) { - jl_error("Cannot serialize instances of foreign datatypes"); - } - - char *data = (char*)jl_data_ptr(v); - size_t i, j, np = t->layout->npointers; - uint32_t nf = t->layout->nfields; - char *last = data; - for (i = 0, j = 0; i < nf+1; i++) { - char *ptr = data + (i < nf ? jl_field_offset(t, i) : jl_datatype_size(t)); - if (j < np) { - char *prevptr = (char*)&((jl_value_t**)data)[jl_ptr_offset(t, j)]; - while (ptr > prevptr) { - // previous field contained pointers; write them and their interleaved data - if (prevptr > last) - ios_write(s->s, last, prevptr - last); - jl_value_t *e = *(jl_value_t**)prevptr; - JL_GC_PROMISE_ROOTED(e); - if (t->name->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) && - jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL) - // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) - jl_serialize_cnull(s, jl_typeof(e)); - else - jl_serialize_value(s, e); - last = prevptr + sizeof(jl_value_t*); - j++; - if (j < np) - prevptr = (char*)&((jl_value_t**)data)[jl_ptr_offset(t, j)]; - else - break; - } - } - if (i == nf) - break; - if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) { - if (ptr > last) - ios_write(s->s, last, ptr - last); - char *n = NULL; - ios_write(s->s, (char*)&n, sizeof(n)); - last = ptr + sizeof(n); - } - } - char *ptr = data + jl_datatype_size(t); - if (ptr > last) - ios_write(s->s, last, ptr - last); - } -} - - -// Create the forward-edge map (caller => callees) -// the intent of these functions is to invert the backedges tree -// for anything that points to a method not part of the worklist -// -// from MethodTables -static void jl_collect_missing_backedges(jl_methtable_t *mt) -{ - jl_array_t *backedges = mt->backedges; - if (backedges) { - size_t i, l = jl_array_len(backedges); - for (i = 1; i < l; i += 2) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i); - jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1); // signature of abstract callee - jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); - if (edges == NULL) { - edges = jl_alloc_vec_any(0); - JL_GC_PUSH1(&edges); - edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); - JL_GC_POP(); - } - jl_array_ptr_1d_push(edges, NULL); - jl_array_ptr_1d_push(edges, missing_callee); - } - } -} - - -// from MethodInstances -static void collect_backedges(jl_method_instance_t *callee, int internal) JL_GC_DISABLED -{ - jl_array_t *backedges = callee->backedges; - if (backedges) { - size_t i = 0, l = jl_array_len(backedges); - while (i < l) { - jl_value_t *invokeTypes; - jl_method_instance_t *caller; - i = get_next_edge(backedges, i, &invokeTypes, &caller); - jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); - if (edges == NULL) { - edges = jl_alloc_vec_any(0); - JL_GC_PUSH1(&edges); - edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); - JL_GC_POP(); - } - jl_array_ptr_1d_push(edges, invokeTypes); - jl_array_ptr_1d_push(edges, (jl_value_t*)callee); - } - } -} - - -// For functions owned by modules not on the worklist, call this on each method. -// - if the method is owned by a worklist module, add it to the list of things to be -// fully serialized -// - Collect all backedges (may be needed later when we invert this list). -static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) JL_GC_DISABLED -{ - jl_array_t *s = (jl_array_t*)closure; - jl_method_t *m = ml->func.method; - if (s && module_in_worklist(m->module)) { - jl_array_ptr_1d_push(s, (jl_value_t*)m); - jl_array_ptr_1d_push(s, (jl_value_t*)ml->simplesig); - } - jl_svec_t *specializations = m->specializations; - size_t i, l = jl_svec_len(specializations); - for (i = 0; i < l; i++) { - jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i); - if ((jl_value_t*)callee != jl_nothing) - collect_backedges(callee, !s); - } - return 1; -} - -static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) JL_GC_DISABLED -{ - jl_typemap_visitor(mt->defs, jl_collect_methcache_from_mod, (void*)s); -} - -// Collect methods of external functions defined by modules in the worklist -// "extext" = "extending external" -// Also collect relevant backedges -static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DISABLED -{ - if (s && module_in_worklist(m)) - s = NULL; // do not collect any methods - size_t i; - void **table = m->bindings.table; - for (i = 1; i < m->bindings.size; i += 2) { - if (table[i] != HT_NOTFOUND) { - jl_binding_t *b = (jl_binding_t*)table[i]; - if (b->owner == m && b->value && b->constp) { - jl_value_t *bv = jl_unwrap_unionall(b->value); - if (jl_is_datatype(bv)) { - jl_typename_t *tn = ((jl_datatype_t*)bv)->name; - if (tn->module == m && tn->name == b->name && tn->wrapper == b->value) { - jl_methtable_t *mt = tn->mt; - if (mt != NULL && - (jl_value_t*)mt != jl_nothing && - (mt != jl_type_type_mt && mt != jl_nonfunction_mt)) { - assert(mt->module == tn->module); - jl_collect_methtable_from_mod(s, mt); - if (s) - jl_collect_missing_backedges(mt); - } - } - } - else if (jl_is_module(b->value)) { - jl_module_t *child = (jl_module_t*)b->value; - if (child != m && child->parent == m && child->name == b->name) { - // this is the original/primary binding for the submodule - jl_collect_extext_methods_from_mod(s, (jl_module_t*)b->value); - } - } - else if (jl_is_mtable(b->value)) { - jl_methtable_t *mt = (jl_methtable_t*)b->value; - if (mt->module == m && mt->name == b->name) { - // this is probably an external method table, so let's assume so - // as there is no way to precisely distinguish them, - // and the rest of this serializer does not bother - // to handle any method tables specially - jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv); - } - } - } - } - } -} - -static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges) JL_GC_DISABLED -{ - jl_array_t *callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL); - if (callees != NULL) { - jl_array_ptr_1d_push(edges, (jl_value_t*)caller); - jl_array_ptr_1d_push(edges, (jl_value_t*)callees); - size_t i, l = jl_array_len(callees); - for (i = 1; i < l; i += 2) { - jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i); - if (c && jl_is_method_instance(c)) { - arraylist_push(wq, c); - } - } - } -} - - -// Extract `edges` and `ext_targets` from `edges_map` -// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges -// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target -static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets) -{ - size_t world = jl_atomic_load_acquire(&jl_world_counter); - arraylist_t wq; - arraylist_new(&wq, 0); - void **table = (void**)jl_array_data(edges_map); // edges is caller => callees - size_t table_size = jl_array_len(edges_map); - for (size_t i = 0; i < table_size; i += 2) { - assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) && - "edges_map changed during iteration"); - jl_method_instance_t *caller = (jl_method_instance_t*)table[i]; - jl_array_t *callees = (jl_array_t*)table[i + 1]; - if (callees == NULL) - continue; - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - if (module_in_worklist(caller->def.method->module) || - method_instance_in_queue(caller)) { - jl_record_edges(caller, &wq, edges); - } - } - while (wq.len) { - jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq); - jl_record_edges(caller, &wq, edges); - } - arraylist_free(&wq); - edges_map = NULL; - htable_t edges_map2; - htable_new(&edges_map2, 0); - htable_t edges_ids; - size_t l = jl_array_len(edges); - htable_new(&edges_ids, l); - for (size_t i = 0; i < l / 2; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2); - void *target = (void*)((char*)HT_NOTFOUND + i + 1); - ptrhash_put(&edges_ids, (void*)caller, target); - } - // process target list to turn it into a memoized validity table - // and compute the old methods list, ready for serialization - jl_value_t *matches = NULL; - jl_array_t *callee_ids = NULL; - JL_GC_PUSH2(&matches, &callee_ids); - for (size_t i = 0; i < l; i += 2) { - jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1); - size_t l = jl_array_len(callees); - callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - idxs[0] = 0; - size_t nt = 0; - for (size_t j = 0; j < l; j += 2) { - jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j); - jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); - assert(callee && "unsupported edge"); - - if (jl_is_method_instance(callee)) { - jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); - if (module_in_worklist(mt->module)) - continue; - } - - // (nullptr, c) => call - // (invokeTypes, c) => invoke - // (nullptr, invokeTypes) => missing call - // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any) - void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee); - if (target == HT_NOTFOUND) { - size_t min_valid = 0; - size_t max_valid = ~(size_t)0; - if (invokeTypes) { - jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); - if ((jl_value_t*)mt == jl_nothing) { - callee_ids = NULL; // invalid - break; - } - else { - matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid); - if (matches == jl_nothing) { - callee_ids = NULL; // invalid - break; - } - matches = (jl_value_t*)((jl_method_match_t*)matches)->method; - } - } - else { - jl_value_t *sig; - if (jl_is_method_instance(callee)) - sig = ((jl_method_instance_t*)callee)->specTypes; - else - sig = callee; - int ambig = 0; - matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, - -1, 0, world, &min_valid, &max_valid, &ambig); - if (matches == jl_false) { - callee_ids = NULL; // invalid - break; - } - size_t k; - for (k = 0; k < jl_array_len(matches); k++) { - jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); - jl_array_ptr_set(matches, k, match->method); - } - } - jl_array_ptr_1d_push(ext_targets, invokeTypes); - jl_array_ptr_1d_push(ext_targets, callee); - jl_array_ptr_1d_push(ext_targets, matches); - target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3); - ptrhash_put(&edges_map2, (void*)callee, target); - } - idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; - } - jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids - if (!callee_ids) - continue; - idxs[0] = nt; - // record place of every method in edges - // add method edges to the callee_ids list - for (size_t j = 0; j < l; j += 2) { - jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); - if (callee && jl_is_method_instance(callee)) { - void *target = ptrhash_get(&edges_ids, (void*)callee); - if (target != HT_NOTFOUND) { - idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; - } - } - } - jl_array_del_end(callee_ids, l - nt); - } - JL_GC_POP(); - htable_free(&edges_map2); -} - -// serialize information about all loaded modules -static void write_mod_list(ios_t *s, jl_array_t *a) -{ - size_t i; - size_t len = jl_array_len(a); - for (i = 0; i < len; i++) { - jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i); - assert(jl_is_module(m)); - if (!module_in_worklist(m)) { - const char *modname = jl_symbol_name(m->name); - size_t l = strlen(modname); - write_int32(s, l); - ios_write(s, modname, l); - write_uint64(s, m->uuid.hi); - write_uint64(s, m->uuid.lo); - write_uint64(s, m->build_id); - } - } - write_int32(s, 0); -} - -// "magic" string and version header of .ji file -static const int JI_FORMAT_VERSION = 11; -static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature -static const uint16_t BOM = 0xFEFF; // byte-order marker -static void write_header(ios_t *s) -{ - ios_write(s, JI_MAGIC, strlen(JI_MAGIC)); - write_uint16(s, JI_FORMAT_VERSION); - ios_write(s, (char *) &BOM, 2); - write_uint8(s, sizeof(void*)); - ios_write(s, JL_BUILD_UNAME, strlen(JL_BUILD_UNAME)+1); - ios_write(s, JL_BUILD_ARCH, strlen(JL_BUILD_ARCH)+1); - ios_write(s, JULIA_VERSION_STRING, strlen(JULIA_VERSION_STRING)+1); - const char *branch = jl_git_branch(), *commit = jl_git_commit(); - ios_write(s, branch, strlen(branch)+1); - ios_write(s, commit, strlen(commit)+1); -} - -// serialize information about the result of deserializing this file -static void write_work_list(ios_t *s) -{ - int i, l = jl_array_len(serializer_worklist); - for (i = 0; i < l; i++) { - jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, i); - if (workmod->parent == jl_main_module || workmod->parent == workmod) { - size_t l = strlen(jl_symbol_name(workmod->name)); - write_int32(s, l); - ios_write(s, jl_symbol_name(workmod->name), l); - write_uint64(s, workmod->uuid.hi); - write_uint64(s, workmod->uuid.lo); - write_uint64(s, workmod->build_id); - } - } - write_int32(s, 0); -} - -static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT -{ - if (depmod->parent == jl_main_module || depmod->parent == depmod) - return; - const char *mname = jl_symbol_name(depmod->name); - size_t slen = strlen(mname); - write_module_path(s, depmod->parent); - write_int32(s, slen); - ios_write(s, mname, slen); -} - -// Cache file header -// Serialize the global Base._require_dependencies array of pathnames that -// are include dependencies. Also write Preferences and return -// the location of the srctext "pointer" in the header index. -static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp) -{ - int64_t initial_pos = 0; - int64_t pos = 0; - static jl_array_t *deps = NULL; - if (!deps) - deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies")); - - // unique(deps) to eliminate duplicates while preserving order: - // we preserve order so that the topmost included .jl file comes first - static jl_value_t *unique_func = NULL; - if (!unique_func) - unique_func = jl_get_global(jl_base_module, jl_symbol("unique")); - jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps}; - jl_task_t *ct = jl_current_task; - size_t last_age = ct->world_age; - ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); - ct->world_age = last_age; - - // write a placeholder for total size so that we can quickly seek past all of the - // dependencies if we don't need them - initial_pos = ios_pos(s); - write_uint64(s, 0); - if (udeps) { - size_t i, l = jl_array_len(udeps); - for (i = 0; i < l; i++) { - jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - size_t slen = jl_string_len(dep); - write_int32(s, slen); - ios_write(s, jl_string_data(dep), slen); - write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime - jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module - jl_module_t *depmod_top = depmod; - while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) - depmod_top = depmod_top->parent; - unsigned provides = 0; - size_t j, lj = jl_array_len(serializer_worklist); - for (j = 0; j < lj; j++) { - jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, j); - if (workmod->parent == jl_main_module || workmod->parent == workmod) { - ++provides; - if (workmod == depmod_top) { - write_int32(s, provides); - write_module_path(s, depmod); - break; - } - } - } - write_int32(s, 0); - } - write_int32(s, 0); // terminator, for ease of reading - - // Calculate Preferences hash for current package. - jl_value_t *prefs_hash = NULL; - jl_value_t *prefs_list = NULL; - JL_GC_PUSH1(&prefs_list); - if (jl_base_module) { - // Toplevel module is the module we're currently compiling, use it to get our preferences hash - jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__")); - jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash")); - jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences")); - - if (toplevel && prefs_hash_func && get_compiletime_prefs_func) { - // Temporary invoke in newest world age - size_t last_age = ct->world_age; - ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - - // call get_compiletime_prefs(__toplevel__) - jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL}; - prefs_list = (jl_value_t*)jl_apply(args, 2); - - // Call get_preferences_hash(__toplevel__, prefs_list) - args[0] = prefs_hash_func; - args[2] = prefs_list; - prefs_hash = (jl_value_t*)jl_apply(args, 3); - - // Reset world age to normal - ct->world_age = last_age; - } - } - - // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file. - if (prefs_hash != NULL && prefs_list != NULL) { - size_t i, l = jl_array_len(prefs_list); - for (i = 0; i < l; i++) { - jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i); - size_t slen = jl_string_len(pref_name); - write_int32(s, slen); - ios_write(s, jl_string_data(pref_name), slen); - } - write_int32(s, 0); // terminator - write_uint64(s, jl_unbox_uint64(prefs_hash)); - } else { - // This is an error path, but let's at least generate a valid `.ji` file. - // We declare an empty list of preference names, followed by a zero-hash. - // The zero-hash is not what would be generated for an empty set of preferences, - // and so this `.ji` file will be invalidated by a future non-erroring pass - // through this function. - write_int32(s, 0); - write_uint64(s, 0); - } - JL_GC_POP(); // for prefs_list - - // write a dummy file position to indicate the beginning of the source-text - pos = ios_pos(s); - ios_seek(s, initial_pos); - write_uint64(s, pos - initial_pos); - ios_seek(s, pos); - write_uint64(s, 0); - } - return pos; -} - -// --- deserialize --- - -static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED; - -static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_value_t **loc) JL_GC_DISABLED -{ - assert(pos == backref_list.len - 1 && "nothing should have been deserialized since assigning pos"); - int tag = read_uint8(s->s); - if (tag == 6 || tag == 7) { - jl_typename_t *name = (jl_typename_t*)jl_deserialize_value(s, NULL); - jl_value_t *dtv = name->wrapper; - jl_svec_t *parameters = (jl_svec_t*)jl_deserialize_value(s, NULL); - dtv = jl_apply_type(dtv, jl_svec_data(parameters), jl_svec_len(parameters)); - backref_list.items[pos] = dtv; - return dtv; - } - if (!(tag == 0 || tag == 5 || tag == 10 || tag == 11 || tag == 12)) { - assert(0 && "corrupt deserialization state"); - abort(); - } - jl_datatype_t *dt = jl_new_uninitialized_datatype(); - backref_list.items[pos] = dt; - if (loc != NULL && loc != HT_NOTFOUND) - *loc = (jl_value_t*)dt; - uint8_t flags = read_uint8(s->s); - uint8_t memflags = read_uint8(s->s); - int has_layout = flags & 1; - int has_instance = (flags >> 1) & 1; - dt->hasfreetypevars = memflags & 1; - dt->isconcretetype = (memflags >> 1) & 1; - dt->isdispatchtuple = (memflags >> 2) & 1; - dt->isbitstype = (memflags >> 3) & 1; - dt->zeroinit = (memflags >> 4) & 1; - dt->has_concrete_subtype = (memflags >> 5) & 1; - dt->cached_by_hash = (memflags >> 6) & 1; - dt->isprimitivetype = (memflags >> 7) & 1; - dt->hash = read_int32(s->s); - - if (has_layout) { - uint8_t layout = read_uint8(s->s); - if (layout == 1) { - dt->layout = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout; - } - else if (layout == 2) { - dt->layout = jl_nothing_type->layout; - } - else if (layout == 3) { - dt->layout = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->layout; - } - else { - assert(layout == 0); - jl_datatype_layout_t buffer; - ios_readall(s->s, (char*)&buffer, sizeof(buffer)); - uint32_t nf = buffer.nfields; - uint32_t np = buffer.npointers; - uint8_t fielddesc_type = buffer.fielddesc_type; - size_t fielddesc_size = nf > 0 ? jl_fielddesc_size(fielddesc_type) : 0; - size_t fldsize = nf * fielddesc_size; - if (buffer.first_ptr != -1) - fldsize += np << fielddesc_type; - jl_datatype_layout_t *layout = (jl_datatype_layout_t*)jl_gc_perm_alloc( - sizeof(jl_datatype_layout_t) + fldsize, - 0, 4, 0); - *layout = buffer; - ios_readall(s->s, (char*)(layout + 1), fldsize); - dt->layout = layout; - } - } - - if (tag == 10 || tag == 11 || tag == 12) { - assert(pos > 0); - arraylist_push(&flagref_list, loc == HT_NOTFOUND ? NULL : loc); - arraylist_push(&flagref_list, (void*)(uintptr_t)pos); - ptrhash_put(&uniquing_table, dt, NULL); - } - - if (has_instance) { - assert(dt->isconcretetype && "there shouldn't be an instance on an abstract type"); - dt->instance = jl_deserialize_value(s, &dt->instance); - jl_gc_wb(dt, dt->instance); - } - dt->name = (jl_typename_t*)jl_deserialize_value(s, (jl_value_t**)&dt->name); - jl_gc_wb(dt, dt->name); - dt->parameters = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->parameters); - jl_gc_wb(dt, dt->parameters); - dt->super = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&dt->super); - jl_gc_wb(dt, dt->super); - dt->types = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->types); - if (dt->types) jl_gc_wb(dt, dt->types); - - return (jl_value_t*)dt; -} - -static jl_value_t *jl_deserialize_value_svec(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED -{ - size_t i, len; - if (tag == TAG_SVEC) - len = read_uint8(s->s); - else - len = read_int32(s->s); - jl_svec_t *sv = jl_alloc_svec(len); - if (loc != NULL) - *loc = (jl_value_t*)sv; - arraylist_push(&backref_list, (jl_value_t*)sv); - jl_value_t **data = jl_svec_data(sv); - for (i = 0; i < len; i++) { - data[i] = jl_deserialize_value(s, &data[i]); - } - return (jl_value_t*)sv; -} - -static jl_value_t *jl_deserialize_value_symbol(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED -{ - size_t len; - if (tag == TAG_SYMBOL) - len = read_uint8(s->s); - else - len = read_int32(s->s); - char *name = (char*)(len >= 256 ? malloc_s(len + 1) : alloca(len + 1)); - ios_readall(s->s, name, len); - name[len] = '\0'; - jl_value_t *sym = (jl_value_t*)jl_symbol(name); - if (len >= 256) - free(name); - arraylist_push(&backref_list, sym); - return sym; -} - -static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED -{ - int16_t i, ndims; - int isptr, isunion, hasptr, elsize; - if (tag == TAG_ARRAY1D) { - ndims = 1; - elsize = read_uint8(s->s); - isptr = (elsize >> 7) & 1; - hasptr = (elsize >> 6) & 1; - isunion = (elsize >> 5) & 1; - elsize = elsize & 0x1f; - } - else { - ndims = read_uint16(s->s); - elsize = read_uint16(s->s); - isptr = (elsize >> 15) & 1; - hasptr = (elsize >> 14) & 1; - isunion = (elsize >> 13) & 1; - elsize = elsize & 0x1fff; - } - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, NULL); - size_t *dims = (size_t*)alloca(ndims * sizeof(size_t)); - for (i = 0; i < ndims; i++) { - dims[i] = jl_unbox_long(jl_deserialize_value(s, NULL)); - } - jl_array_t *a = jl_new_array_for_deserialization( - (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize); - backref_list.items[pos] = a; - jl_value_t *aty = jl_deserialize_value(s, &jl_astaggedvalue(a)->type); - jl_set_typeof(a, aty); - if (a->flags.ptrarray) { - jl_value_t **data = (jl_value_t**)jl_array_data(a); - size_t i, numel = jl_array_len(a); - for (i = 0; i < numel; i++) { - data[i] = jl_deserialize_value(s, &data[i]); - //if (data[i]) // not needed because `a` is new (gc is disabled) - // jl_gc_wb(a, data[i]); - } - assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled - } - else if (a->flags.hasptr) { - size_t i, numel = jl_array_len(a); - char *data = (char*)jl_array_data(a); - uint16_t elsz = a->elsize; - jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(a)); - size_t j, np = et->layout->npointers; - for (i = 0; i < numel; i++) { - char *start = data; - for (j = 0; j < np; j++) { - uint32_t ptr = jl_ptr_offset(et, j); - jl_value_t **fld = &((jl_value_t**)data)[ptr]; - if ((char*)fld != start) - ios_readall(s->s, start, (const char*)fld - start); - *fld = jl_deserialize_value(s, fld); - //if (*fld) // not needed because `a` is new (gc is disabled) - // jl_gc_wb(a, *fld); - start = (char*)&fld[1]; - } - data += elsz; - if (data != start) - ios_readall(s->s, start, data - start); - } - assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled - } - else { - size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0; - size_t tot = jl_array_len(a) * a->elsize + extra; - ios_readall(s->s, (char*)jl_array_data(a), tot); - } - return (jl_value_t*)a; -} - -static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_method_t *m = - (jl_method_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_t), - jl_method_type); - memset(m, 0, sizeof(jl_method_t)); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, m); - m->sig = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&m->sig); - jl_gc_wb(m, m->sig); - m->module = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->module); - jl_gc_wb(m, m->module); - int serialization_mode = read_uint8(s->s); - if (serialization_mode & METHOD_EXTERNAL_MT) { - jl_module_t *mt_mod = (jl_module_t*)jl_deserialize_value(s, NULL); - jl_sym_t *mt_name = (jl_sym_t*)jl_deserialize_value(s, NULL); - m->external_mt = jl_get_global(mt_mod, mt_name); - jl_gc_wb(m, m->external_mt); - assert(jl_typeis(m->external_mt, jl_methtable_type)); - } - else { - m->external_mt = jl_deserialize_value(s, &m->external_mt); - jl_gc_wb(m, m->external_mt); - } - if (!(serialization_mode & METHOD_INTERNAL)) { - assert(loc != NULL && loc != HT_NOTFOUND); - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)pos); - if (serialization_mode & METHOD_HAS_NEW_ROOTS) { - uint64_t key = read_uint64(s->s); - int i, nnew = read_int32(s->s); - jl_array_t *newroots = jl_alloc_vec_any(nnew); - jl_value_t **data = (jl_value_t**)jl_array_data(newroots); - for (i = 0; i < nnew; i++) - data[i] = jl_deserialize_value(s, &(data[i])); - // Storing the new roots in `m->roots` risks losing them due to recaching - // (which replaces pointers to `m` with ones to the "live" method). - // Put them in separate storage so we can find them later. - assert(ptrhash_get(&queued_method_roots, m) == HT_NOTFOUND); - // In storing the key, on 32-bit platforms we need two slots. Might as well do this for all platforms. - jl_svec_t *qmrval = jl_alloc_svec_uninit(3); // GC is disabled - jl_svec_data(qmrval)[0] = (jl_value_t*)(uintptr_t)(key & ((((uint64_t)1) << 32) - 1)); // lo bits - jl_svec_data(qmrval)[1] = (jl_value_t*)(uintptr_t)((key >> 32) & ((((uint64_t)1) << 32) - 1)); // hi bits - jl_svec_data(qmrval)[2] = (jl_value_t*)newroots; - ptrhash_put(&queued_method_roots, m, qmrval); - } - return (jl_value_t*)m; - } - m->specializations = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->specializations); - jl_gc_wb(m, m->specializations); - jl_array_t *speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset); - jl_atomic_store_relaxed(&m->speckeyset, speckeyset); - jl_gc_wb(m, speckeyset); - m->name = (jl_sym_t*)jl_deserialize_value(s, NULL); - jl_gc_wb(m, m->name); - m->file = (jl_sym_t*)jl_deserialize_value(s, NULL); - m->line = read_int32(s->s); - m->primary_world = jl_atomic_load_acquire(&jl_world_counter); - m->deleted_world = ~(size_t)0; - m->called = read_int32(s->s); - m->nargs = read_int32(s->s); - m->nospecialize = read_int32(s->s); - m->nkw = read_int32(s->s); - m->isva = read_int8(s->s); - m->pure = read_int8(s->s); - m->is_for_opaque_closure = read_int8(s->s); - m->constprop = read_int8(s->s); - m->purity.bits = read_uint8(s->s); - m->slot_syms = jl_deserialize_value(s, (jl_value_t**)&m->slot_syms); - jl_gc_wb(m, m->slot_syms); - m->roots = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->roots); - if (m->roots) - jl_gc_wb(m, m->roots); - m->root_blocks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->root_blocks); - if (m->root_blocks) - jl_gc_wb(m, m->root_blocks); - m->nroots_sysimg = read_int32(s->s); - m->ccallable = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->ccallable); - if (m->ccallable) { - jl_gc_wb(m, m->ccallable); - arraylist_push(&ccallable_list, m->ccallable); - } - m->source = jl_deserialize_value(s, &m->source); - if (m->source) - jl_gc_wb(m, m->source); - m->unspecialized = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&m->unspecialized); - if (m->unspecialized) - jl_gc_wb(m, m->unspecialized); - m->generator = jl_deserialize_value(s, (jl_value_t**)&m->generator); - if (m->generator) - jl_gc_wb(m, m->generator); - m->invokes = jl_deserialize_value(s, (jl_value_t**)&m->invokes); - jl_gc_wb(m, m->invokes); - m->recursion_relation = jl_deserialize_value(s, (jl_value_t**)&m->recursion_relation); - if (m->recursion_relation) - jl_gc_wb(m, m->recursion_relation); - JL_MUTEX_INIT(&m->writelock); - return (jl_value_t*)m; -} - -static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_method_instance_t *mi = - (jl_method_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_instance_t), - jl_method_instance_type); - memset(mi, 0, sizeof(jl_method_instance_t)); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, mi); - int internal = read_uint8(s->s); - if (internal == 1) { - mi->uninferred = jl_deserialize_value(s, &mi->uninferred); - jl_gc_wb(mi, mi->uninferred); - } - mi->specTypes = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&mi->specTypes); - jl_gc_wb(mi, mi->specTypes); - mi->def.value = jl_deserialize_value(s, &mi->def.value); - jl_gc_wb(mi, mi->def.value); - - if (!internal) { - assert(loc != NULL && loc != HT_NOTFOUND); - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)pos); - return (jl_value_t*)mi; - } - - mi->sparam_vals = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&mi->sparam_vals); - jl_gc_wb(mi, mi->sparam_vals); - mi->backedges = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->backedges); - if (mi->backedges) - jl_gc_wb(mi, mi->backedges); - mi->callbacks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->callbacks); - if (mi->callbacks) - jl_gc_wb(mi, mi->callbacks); - mi->cache = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&mi->cache); - if (mi->cache) - jl_gc_wb(mi, mi->cache); - return (jl_value_t*)mi; -} - -static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_code_instance_t *codeinst = - (jl_code_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_code_instance_t), jl_code_instance_type); - memset(codeinst, 0, sizeof(jl_code_instance_t)); - arraylist_push(&backref_list, codeinst); - int flags = read_uint8(s->s); - int validate = (flags >> 0) & 3; - int constret = (flags >> 2) & 1; - codeinst->ipo_purity_bits = read_uint32(s->s); - jl_atomic_store_relaxed(&codeinst->purity_bits, read_uint32(s->s)); - codeinst->def = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->def); - jl_gc_wb(codeinst, codeinst->def); - jl_value_t *inferred = jl_deserialize_value(s, NULL); - jl_atomic_store_release(&codeinst->inferred, inferred); - jl_gc_wb(codeinst, inferred); - codeinst->rettype_const = jl_deserialize_value(s, &codeinst->rettype_const); - if (codeinst->rettype_const) - jl_gc_wb(codeinst, codeinst->rettype_const); - codeinst->rettype = jl_deserialize_value(s, &codeinst->rettype); - jl_gc_wb(codeinst, codeinst->rettype); - codeinst->argescapes = jl_deserialize_value(s, &codeinst->argescapes); - jl_gc_wb(codeinst, codeinst->argescapes); - if (constret) - codeinst->invoke = jl_fptr_const_return; - if ((flags >> 3) & 1) - codeinst->precompile = 1; - codeinst->relocatability = read_uint8(s->s); - assert(codeinst->relocatability <= 1); - codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next); - jl_gc_wb(codeinst, codeinst->next); - if (validate) { - codeinst->min_world = jl_atomic_load_acquire(&jl_world_counter); - ptrhash_put(&new_code_instance_validate, codeinst, (void*)(~(uintptr_t)HT_NOTFOUND)); // "HT_FOUND" - } - return (jl_value_t*)codeinst; -} - -static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DISABLED -{ - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, NULL); - jl_sym_t *mname = (jl_sym_t*)jl_deserialize_value(s, NULL); - int ref_only = read_uint8(s->s); - if (ref_only) { - jl_value_t *m_ref; - if (ref_only == 1) - m_ref = jl_get_global((jl_module_t*)jl_deserialize_value(s, NULL), mname); - else - m_ref = jl_array_ptr_ref(s->loaded_modules_array, read_int32(s->s)); - backref_list.items[pos] = m_ref; - return m_ref; - } - jl_module_t *m = jl_new_module(mname); - backref_list.items[pos] = m; - m->parent = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->parent); - jl_gc_wb(m, m->parent); - - while (1) { - jl_sym_t *asname = (jl_sym_t*)jl_deserialize_value(s, NULL); - if (asname == NULL) - break; - jl_binding_t *b = jl_get_binding_wr(m, asname, 1); - b->name = (jl_sym_t*)jl_deserialize_value(s, (jl_value_t**)&b->name); - jl_value_t *bvalue = jl_deserialize_value(s, (jl_value_t**)&b->value); - *(jl_value_t**)&b->value = bvalue; - if (bvalue != NULL) jl_gc_wb(m, bvalue); - jl_value_t *bglobalref = jl_deserialize_value(s, (jl_value_t**)&b->globalref); - *(jl_value_t**)&b->globalref = bglobalref; - if (bglobalref != NULL) jl_gc_wb(m, bglobalref); - b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner); - if (b->owner != NULL) jl_gc_wb(m, b->owner); - jl_value_t *bty = jl_deserialize_value(s, (jl_value_t**)&b->ty); - *(jl_value_t**)&b->ty = bty; - int8_t flags = read_int8(s->s); - b->deprecated = (flags>>3) & 1; - b->constp = (flags>>2) & 1; - b->exportp = (flags>>1) & 1; - b->imported = (flags) & 1; - } - size_t i = m->usings.len; - size_t ni = read_int32(s->s); - arraylist_grow(&m->usings, ni); - ni += i; - while (i < ni) { - m->usings.items[i] = jl_deserialize_value(s, (jl_value_t**)&m->usings.items[i]); - i++; - } - m->istopmod = read_uint8(s->s); - m->uuid.hi = read_uint64(s->s); - m->uuid.lo = read_uint64(s->s); - m->build_id = read_uint64(s->s); - m->counter = read_int32(s->s); - m->nospecialize = read_int32(s->s); - m->optlevel = read_int8(s->s); - m->compile = read_int8(s->s); - m->infer = read_int8(s->s); - m->max_methods = read_int8(s->s); - m->primary_world = jl_atomic_load_acquire(&jl_world_counter); - return (jl_value_t*)m; -} - -static jl_value_t *jl_deserialize_value_singleton(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_value_t *v = (jl_value_t*)jl_gc_alloc(s->ptls, 0, NULL); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, (void*)v); - // TODO: optimize the case where the value can easily be obtained - // from an external module (tag == 6) as dt->instance - assert(loc != HT_NOTFOUND); - // if loc == NULL, then the caller can't provide the address where the instance will be - // stored. this happens if a field might store a 0-size value, but the field itself is - // not 0 size, e.g. `::Union{Int,Nothing}` - if (loc != NULL) { - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)pos); - } - jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)HT_NOTFOUND); // no loc, since if dt is replaced, then dt->instance would be also - jl_set_typeof(v, dt); - if (dt->instance == NULL) - return v; - return dt->instance; -} - -static void jl_deserialize_struct(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED -{ - jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v); - char *data = (char*)jl_data_ptr(v); - size_t i, np = dt->layout->npointers; - char *start = data; - for (i = 0; i < np; i++) { - uint32_t ptr = jl_ptr_offset(dt, i); - jl_value_t **fld = &((jl_value_t**)data)[ptr]; - if ((char*)fld != start) - ios_readall(s->s, start, (const char*)fld - start); - *fld = jl_deserialize_value(s, fld); - //if (*fld)// a is new (gc is disabled) - // jl_gc_wb(a, *fld); - start = (char*)&fld[1]; - } - data += jl_datatype_size(dt); - if (data != start) - ios_readall(s->s, start, data - start); - if (dt == jl_typemap_entry_type) { - jl_typemap_entry_t *entry = (jl_typemap_entry_t*)v; - if (entry->max_world == ~(size_t)0) { - if (entry->min_world > 1) { - // update world validity to reflect current state of the counter - entry->min_world = jl_atomic_load_acquire(&jl_world_counter); - } - } - else { - // garbage entry - delete it :( - entry->min_world = 1; - entry->max_world = 0; - } - } else if (dt == jl_globalref_type) { - jl_globalref_t *r = (jl_globalref_t*)v; - jl_binding_t *b = jl_get_binding_if_bound(r->mod, r->name); - r->bnd_cache = b && b->value ? b : NULL; - } -} - -static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED -{ - int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s)); - jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL); - jl_set_typeof(v, (void*)(intptr_t)0x50); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, v); - jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, &jl_astaggedvalue(v)->type); - assert(sz != 0 || loc); - if (dt == jl_typename_type) { - int internal = read_uint8(s->s); - jl_typename_t *tn; - if (internal) { - tn = (jl_typename_t*)jl_gc_alloc( - s->ptls, sizeof(jl_typename_t), jl_typename_type); - memset(tn, 0, sizeof(jl_typename_t)); - tn->cache = jl_emptysvec; // the cache is refilled later (tag 5) - tn->linearcache = jl_emptysvec; // the cache is refilled later (tag 5) - backref_list.items[pos] = tn; - } - jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL); - jl_sym_t *sym = (jl_sym_t*)jl_deserialize_value(s, NULL); - if (internal) { - tn->module = m; - tn->name = sym; - tn->names = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&tn->names); - jl_gc_wb(tn, tn->names); - tn->wrapper = jl_deserialize_value(s, &tn->wrapper); - jl_gc_wb(tn, tn->wrapper); - tn->Typeofwrapper = NULL; - tn->mt = (jl_methtable_t*)jl_deserialize_value(s, (jl_value_t**)&tn->mt); - jl_gc_wb(tn, tn->mt); - ios_read(s->s, (char*)&tn->hash, sizeof(tn->hash)); - int8_t flags = read_int8(s->s); - tn->_reserved = 0; - tn->abstract = flags & 1; - tn->mutabl = (flags>>1) & 1; - tn->mayinlinealloc = (flags>>2) & 1; - tn->max_methods = read_uint8(s->s); - if (tn->abstract) - tn->n_uninitialized = 0; - else - tn->n_uninitialized = read_uint16(s->s); - size_t nfields = read_int32(s->s); - if (nfields) { - tn->atomicfields = (uint32_t*)malloc(nfields); - ios_read(s->s, (char*)tn->atomicfields, nfields); - } - nfields = read_int32(s->s); - if (nfields) { - tn->constfields = (uint32_t*)malloc(nfields); - ios_read(s->s, (char*)tn->constfields, nfields); - } - } - else { - jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(jl_get_global(m, sym)); - assert(jl_is_datatype(dt)); - tn = dt->name; - backref_list.items[pos] = tn; - } - return (jl_value_t*)tn; - } - jl_set_typeof(v, dt); - if ((jl_value_t*)dt == jl_bigint_type) { - jl_value_t *sizefield = jl_deserialize_value(s, NULL); - int32_t sz = jl_unbox_int32(sizefield); - int32_t nw = (sz == 0 ? 1 : (sz < 0 ? -sz : sz)); - size_t nb = nw * gmp_limb_size; - void *buf = jl_gc_counted_malloc(nb); - if (buf == NULL) - jl_throw(jl_memory_exception); - ios_readall(s->s, (char*)buf, nb); - jl_set_nth_field(v, 0, jl_box_int32(nw)); - jl_set_nth_field(v, 1, sizefield); - jl_set_nth_field(v, 2, jl_box_voidpointer(buf)); - } - else { - jl_deserialize_struct(s, v); - } - return v; -} - -static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - assert(!ios_eof(s->s)); - jl_value_t *v; - size_t n; - uintptr_t pos; - uint8_t tag = read_uint8(s->s); - if (tag > LAST_TAG) - return deser_tag[tag]; - switch (tag) { - case TAG_NULL: return NULL; - case 0: - tag = read_uint8(s->s); - return deser_tag[tag]; - case TAG_BACKREF: JL_FALLTHROUGH; case TAG_SHORT_BACKREF: ; - uintptr_t offs = (tag == TAG_BACKREF) ? read_int32(s->s) : read_uint16(s->s); - int isflagref = 0; - isflagref = !!(offs & 1); - offs >>= 1; - // assert(offs >= 0); // offs is unsigned so this is always true - assert(offs < backref_list.len); - jl_value_t *bp = (jl_value_t*)backref_list.items[offs]; - assert(bp); - if (isflagref && loc != HT_NOTFOUND) { - if (loc != NULL) { - // as in jl_deserialize_value_singleton, the caller won't have a place to - // store this reference given a field type like Union{Int,Nothing} - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)(uintptr_t)-1); - } - } - return (jl_value_t*)bp; - case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC: - return jl_deserialize_value_svec(s, tag, loc); - case TAG_COMMONSYM: - return deser_symbols[read_uint8(s->s)]; - case TAG_SYMBOL: JL_FALLTHROUGH; case TAG_LONG_SYMBOL: - return jl_deserialize_value_symbol(s, tag); - case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D: - return jl_deserialize_value_array(s, tag); - case TAG_UNIONALL: - pos = backref_list.len; - arraylist_push(&backref_list, NULL); - if (read_uint8(s->s)) { - jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL); - jl_sym_t *sym = (jl_sym_t*)jl_deserialize_value(s, NULL); - jl_value_t *v = jl_get_global(m, sym); - assert(jl_is_unionall(v)); - backref_list.items[pos] = v; - return v; - } - v = jl_gc_alloc(s->ptls, sizeof(jl_unionall_t), jl_unionall_type); - backref_list.items[pos] = v; - ((jl_unionall_t*)v)->var = (jl_tvar_t*)jl_deserialize_value(s, (jl_value_t**)&((jl_unionall_t*)v)->var); - jl_gc_wb(v, ((jl_unionall_t*)v)->var); - ((jl_unionall_t*)v)->body = jl_deserialize_value(s, &((jl_unionall_t*)v)->body); - jl_gc_wb(v, ((jl_unionall_t*)v)->body); - return v; - case TAG_TVAR: - v = jl_gc_alloc(s->ptls, sizeof(jl_tvar_t), jl_tvar_type); - jl_tvar_t *tv = (jl_tvar_t*)v; - arraylist_push(&backref_list, tv); - tv->name = (jl_sym_t*)jl_deserialize_value(s, NULL); - jl_gc_wb(tv, tv->name); - tv->lb = jl_deserialize_value(s, &tv->lb); - jl_gc_wb(tv, tv->lb); - tv->ub = jl_deserialize_value(s, &tv->ub); - jl_gc_wb(tv, tv->ub); - return (jl_value_t*)tv; - case TAG_METHOD: - return jl_deserialize_value_method(s, loc); - case TAG_METHOD_INSTANCE: - return jl_deserialize_value_method_instance(s, loc); - case TAG_CODE_INSTANCE: - return jl_deserialize_value_code_instance(s, loc); - case TAG_MODULE: - return jl_deserialize_value_module(s); - case TAG_SHORTER_INT64: - v = jl_box_int64((int16_t)read_uint16(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_SHORT_INT64: - v = jl_box_int64(read_int32(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_INT64: - v = jl_box_int64((int64_t)read_uint64(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_SHORT_INT32: - v = jl_box_int32((int16_t)read_uint16(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_INT32: - v = jl_box_int32(read_int32(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_UINT8: - return jl_box_uint8(read_uint8(s->s)); - case TAG_SINGLETON: - return jl_deserialize_value_singleton(s, loc); - case TAG_CORE: - return (jl_value_t*)jl_core_module; - case TAG_BASE: - return (jl_value_t*)jl_base_module; - case TAG_CNULL: - v = jl_gc_alloc(s->ptls, sizeof(void*), NULL); - jl_set_typeof(v, (void*)(intptr_t)0x50); - *(void**)v = NULL; - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, v); - jl_set_typeof(v, jl_deserialize_value(s, &jl_astaggedvalue(v)->type)); - return v; - case TAG_BITYPENAME: - v = deser_tag[read_uint8(s->s)]; - return (jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(v))->name; - case TAG_STRING: - n = read_int32(s->s); - v = jl_alloc_string(n); - arraylist_push(&backref_list, v); - ios_readall(s->s, jl_string_data(v), n); - return v; - case TAG_DATATYPE: - pos = backref_list.len; - arraylist_push(&backref_list, NULL); - return jl_deserialize_datatype(s, pos, loc); - default: - assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL); - return jl_deserialize_value_any(s, tag, loc); - } -} - -// Add methods to external (non-worklist-owned) functions -static void jl_insert_methods(jl_array_t *list) -{ - size_t i, l = jl_array_len(list); - for (i = 0; i < l; i += 2) { - jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i); - assert(jl_is_method(meth)); - assert(!meth->is_for_opaque_closure); - jl_tupletype_t *simpletype = (jl_tupletype_t*)jl_array_ptr_ref(list, i + 1); - jl_methtable_t *mt = jl_method_get_table(meth); - assert((jl_value_t*)mt != jl_nothing); - jl_method_table_insert(mt, meth, simpletype); - } -} - -int remove_code_instance_from_validation(jl_code_instance_t *codeinst) -{ - return ptrhash_remove(&new_code_instance_validate, codeinst); -} - -// verify that these edges intersect with the same methods as before -static jl_array_t *jl_verify_edges(jl_array_t *targets) -{ - size_t world = jl_atomic_load_acquire(&jl_world_counter); - size_t i, l = jl_array_len(targets) / 3; - jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l); - memset(jl_array_data(valids), 1, l); - jl_value_t *loctag = NULL; - jl_value_t *matches = NULL; - JL_GC_PUSH3(&valids, &matches, &loctag); - for (i = 0; i < l; i++) { - jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3); - jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1); - jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2); - int valid = 1; - size_t min_valid = 0; - size_t max_valid = ~(size_t)0; - if (invokesig) { - assert(callee && "unsupported edge"); - jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); - if ((jl_value_t*)mt == jl_nothing) { - valid = 0; - } - else { - matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_valid, &max_valid); - if (matches == jl_nothing) { - valid = 0; - } - else { - matches = (jl_value_t*)((jl_method_match_t*)matches)->method; - if (matches != expected) { - valid = 0; - } - } - } - } - else { - jl_value_t *sig; - if (jl_is_method_instance(callee)) - sig = ((jl_method_instance_t*)callee)->specTypes; - else - sig = callee; - assert(jl_is_array(expected)); - int ambig = 0; - // TODO: possibly need to included ambiguities too (for the optimizer correctness)? - matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, - -1, 0, world, &min_valid, &max_valid, &ambig); - if (matches == jl_false) { - valid = 0; - } - else { - // setdiff!(matches, expected) - size_t j, k, ins = 0; - if (jl_array_len(matches) != jl_array_len(expected)) { - valid = 0; - } - for (k = 0; k < jl_array_len(matches); k++) { - jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method; - size_t l = jl_array_len(expected); - for (j = 0; j < l; j++) - if (match == (jl_method_t*)jl_array_ptr_ref(expected, j)) - break; - if (j == l) { - // intersection has a new method or a method was - // deleted--this is now probably no good, just invalidate - // everything about it now - valid = 0; - if (!_jl_debug_method_invalidation) - break; - jl_array_ptr_set(matches, ins++, match); - } - } - if (!valid && _jl_debug_method_invalidation) - jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins); - } - } - jl_array_uint8_set(valids, i, valid); - if (!valid && _jl_debug_method_invalidation) { - jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee); - loctag = jl_cstr_to_string("insert_backedges_callee"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - loctag = jl_box_int32((int32_t)i); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches); - } - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig); - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee); - //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); - } - JL_GC_POP(); - return valids; -} - -// Combine all edges relevant to a method into the visited table -void jl_verify_methods(jl_array_t *edges, jl_array_t *valids, htable_t *visited) -{ - jl_value_t *loctag = NULL; - JL_GC_PUSH1(&loctag); - size_t i, l = jl_array_len(edges) / 2; - htable_new(visited, l); - for (i = 0; i < l; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); - assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); - int valid = 1; - if (callee_ids == NULL) { - // serializing the edges had failed - valid = 0; - } - else { - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - size_t j; - for (j = 0; valid && j < idxs[0]; j++) { - int32_t idx = idxs[j + 1]; - valid = jl_array_uint8_ref(valids, idx); - if (!valid && _jl_debug_method_invalidation) { - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); - loctag = jl_cstr_to_string("verify_methods"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - loctag = jl_box_int32((int32_t)idx); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - } - } - } - ptrhash_put(visited, caller, (void*)(((char*)HT_NOTFOUND) + valid + 1)); - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); - //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); - // HT_NOTFOUND: valid (no invalid edges) - // HT_NOTFOUND + 1: invalid - // HT_NOTFOUND + 2: need to scan - // HT_NOTFOUND + 3 + depth: in-progress - } - JL_GC_POP(); -} - - -// Propagate the result of cycle-resolution to all edges (recursively) -static int mark_edges_in_worklist(jl_array_t *edges, int idx, jl_method_instance_t *cycle, htable_t *visited, int found) -{ - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); - int oldfound = (char*)ptrhash_get(visited, caller) - (char*)HT_NOTFOUND; - if (oldfound < 3) - return 0; // not in-progress - if (!found) { - ptrhash_remove(visited, (void*)caller); - } - else { - ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); - } - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); - assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - size_t i, badidx = 0, n = jl_array_len(callee_ids); - for (i = idxs[0] + 1; i < n; i++) { - if (mark_edges_in_worklist(edges, idxs[i], cycle, visited, found) && badidx == 0) - badidx = i - idxs[0]; - } - if (_jl_debug_method_invalidation) { - jl_value_t *loctag = NULL; - JL_GC_PUSH1(&loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); - loctag = jl_cstr_to_string("verify_methods"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - jl_method_instance_t *callee = cycle; - if (badidx--) - callee = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * badidx); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee); - JL_GC_POP(); - } - return 1; -} - - -// Visit the entire call graph, starting from edges[idx] to determine if that method is valid -static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, int depth) -{ - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - int found = (char*)ptrhash_get(visited, (void*)caller) - (char*)HT_NOTFOUND; - if (found == 0) - return 1; // valid - if (found == 1) - return 0; // invalid - if (found != 2) - return found - 1; // depth - found = 0; - ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 3 + depth)); // change 2 to in-progress at depth - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); - assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - int cycle = 0; - size_t i, n = jl_array_len(callee_ids); - for (i = idxs[0] + 1; i < n; i++) { - int32_t idx = idxs[i]; - int child_found = jl_verify_graph_edge(edges, idx, visited, depth + 1); - if (child_found == 0) { - found = 1; - if (_jl_debug_method_invalidation) { - jl_value_t *loctag = NULL; - JL_GC_PUSH1(&loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); - loctag = jl_cstr_to_string("verify_methods"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_array_ptr_ref(edges, idx * 2)); - JL_GC_POP(); - } - break; - } - else if (child_found >= 2 && child_found - 2 < cycle) { - // record the cycle will resolve at depth "cycle" - cycle = child_found - 2; - assert(cycle); - } - } - if (!found) { - if (cycle && cycle != depth) - return cycle + 2; - ptrhash_remove(visited, (void*)caller); - } - else { // found invalid - ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); - } - if (cycle) { - // If we are the top of the current cycle, now mark all other parts of - // our cycle by re-walking the backedges graph and marking all WIP - // items as found. - // Be careful to only re-walk as far as we had originally scanned above. - // Or if we found a backedge, also mark all of the other parts of the - // cycle as also having an backedge. - n = i; - for (i = idxs[0] + 1; i < n; i++) { - mark_edges_in_worklist(edges, idxs[i], caller, visited, found); - } - } - return found ? 0 : 1; -} - -// Visit all entries in edges, verify if they are valid -static jl_array_t *jl_verify_graph(jl_array_t *edges, htable_t *visited) -{ - size_t i, n = jl_array_len(edges) / 2; - jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, n); - JL_GC_PUSH1(&valids); - int8_t *valids_data = (int8_t*)jl_array_data(valids); - for (i = 0; i < n; i++) { - valids_data[i] = jl_verify_graph_edge(edges, i, visited, 1); - } - JL_GC_POP(); - return valids; -} - -// Restore backedges to external targets -// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods. -// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods. -static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *mi_list) -{ - // determine which CodeInstance objects are still valid in our image - size_t world = jl_atomic_load_acquire(&jl_world_counter); - jl_array_t *valids = jl_verify_edges(ext_targets); - JL_GC_PUSH1(&valids); - htable_t visited; - htable_new(&visited, 0); - jl_verify_methods(edges, valids, &visited); - valids = jl_verify_graph(edges, &visited); - size_t i, l = jl_array_len(edges) / 2; - - // next build a map from external_mis to their CodeInstance for insertion - if (mi_list == NULL) { - htable_reset(&visited, 0); - } - else { - size_t i, l = jl_array_len(mi_list); - htable_reset(&visited, l); - for (i = 0; i < l; i++) { - jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(mi_list, i); - ptrhash_put(&visited, (void*)ci->def, (void*)ci); - } - } - - // next disable any invalid codes, so we do not try to enable them - for (i = 0; i < l; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - int valid = jl_array_uint8_ref(valids, i); - if (valid) - continue; - void *ci = ptrhash_get(&visited, (void*)caller); - if (ci != HT_NOTFOUND) { - assert(jl_is_code_instance(ci)); - remove_code_instance_from_validation((jl_code_instance_t*)ci); // mark it as handled - } - else { - jl_code_instance_t *codeinst = caller->cache; - while (codeinst) { - remove_code_instance_from_validation(codeinst); // should be left invalid - codeinst = jl_atomic_load_relaxed(&codeinst->next); - } - } - } - - // finally enable any applicable new codes - for (i = 0; i < l; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); - int valid = jl_array_uint8_ref(valids, i); - if (!valid) - continue; - // if this callee is still valid, add all the backedges - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - for (size_t j = 0; j < idxs[0]; j++) { - int32_t idx = idxs[j + 1]; - jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3); - jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1); - if (callee && jl_is_method_instance(callee)) { - jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller); - } - else { - jl_value_t *sig = callee == NULL ? invokesig : callee; - jl_methtable_t *mt = jl_method_table_for(sig); - // FIXME: rarely, `callee` has an unexpected `Union` signature, - // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344 - // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)` - // This workaround exposes us to (rare) 265-violations. - if ((jl_value_t*)mt != jl_nothing) - jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller); - } - } - // then enable it - void *ci = ptrhash_get(&visited, (void*)caller); - if (ci != HT_NOTFOUND) { - // have some new external code to use - assert(jl_is_code_instance(ci)); - jl_code_instance_t *codeinst = (jl_code_instance_t*)ci; - remove_code_instance_from_validation(codeinst); // mark it as handled - assert(codeinst->min_world >= world && codeinst->inferred); - codeinst->max_world = ~(size_t)0; - if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { - jl_mi_cache_insert(caller, codeinst); - } - } - else { - jl_code_instance_t *codeinst = caller->cache; - while (codeinst) { - if (remove_code_instance_from_validation(codeinst)) { // mark it as handled - assert(codeinst->min_world >= world && codeinst->inferred); - codeinst->max_world = ~(size_t)0; - } - codeinst = jl_atomic_load_relaxed(&codeinst->next); - } - } - } - - htable_free(&visited); - JL_GC_POP(); -} - -static void validate_new_code_instances(void) -{ - size_t world = jl_atomic_load_acquire(&jl_world_counter); - size_t i; - for (i = 0; i < new_code_instance_validate.size; i += 2) { - if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) { - jl_code_instance_t *ci = (jl_code_instance_t*)new_code_instance_validate.table[i]; - JL_GC_PROMISE_ROOTED(ci); // TODO: this needs a root (or restructuring to avoid it) - assert(ci->min_world >= world && ci->inferred); - ci->max_world = ~(size_t)0; - jl_method_instance_t *caller = ci->def; - if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { - jl_mi_cache_insert(caller, ci); - } - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); - //ios_puts("FREE\n", ios_stderr); - } - } -} - -static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *mod_list) -{ - if (!jl_main_module->build_id) { - return jl_get_exceptionf(jl_errorexception_type, - "Main module uuid state is invalid for module deserialization."); - } - size_t i, l = jl_array_len(mod_list); - for (i = 0; ; i++) { - size_t len = read_int32(s); - if (len == 0 && i == l) - return NULL; // success - if (len == 0 || i == l) - return jl_get_exceptionf(jl_errorexception_type, "Wrong number of entries in module list."); - char *name = (char*)alloca(len + 1); - ios_readall(s, name, len); - name[len] = '\0'; - jl_uuid_t uuid; - uuid.hi = read_uint64(s); - uuid.lo = read_uint64(s); - uint64_t build_id = read_uint64(s); - jl_sym_t *sym = _jl_symbol(name, len); - jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_list, i); - if (!m || !jl_is_module(m) || m->uuid.hi != uuid.hi || m->uuid.lo != uuid.lo || m->name != sym || m->build_id != build_id) { - return jl_get_exceptionf(jl_errorexception_type, - "Invalid input in module list: expected %s.", name); - } - } -} - -static int readstr_verify(ios_t *s, const char *str) -{ - size_t i, len = strlen(str); - for (i = 0; i < len; ++i) - if ((char)read_uint8(s) != str[i]) - return 0; - return 1; -} - -JL_DLLEXPORT int jl_read_verify_header(ios_t *s) -{ - uint16_t bom; - return (readstr_verify(s, JI_MAGIC) && - read_uint16(s) == JI_FORMAT_VERSION && - ios_read(s, (char *) &bom, 2) == 2 && bom == BOM && - read_uint8(s) == sizeof(void*) && - readstr_verify(s, JL_BUILD_UNAME) && !read_uint8(s) && - readstr_verify(s, JL_BUILD_ARCH) && !read_uint8(s) && - readstr_verify(s, JULIA_VERSION_STRING) && !read_uint8(s) && - readstr_verify(s, jl_git_branch()) && !read_uint8(s) && - readstr_verify(s, jl_git_commit()) && !read_uint8(s)); -} - -static void jl_finalize_serializer(jl_serializer_state *s) -{ - size_t i, l; - // save module initialization order - if (jl_module_init_order != NULL) { - l = jl_array_len(jl_module_init_order); - for (i = 0; i < l; i++) { - // verify that all these modules were saved - assert(ptrhash_get(&backref_table, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND); - } - } - jl_serialize_value(s, jl_module_init_order); - - // record list of reinitialization functions - l = reinit_list.len; - for (i = 0; i < l; i += 2) { - write_int32(s->s, (int)((uintptr_t) reinit_list.items[i])); - write_int32(s->s, (int)((uintptr_t) reinit_list.items[i+1])); - } - write_int32(s->s, -1); -} - -static void jl_reinit_item(jl_value_t *v, int how, arraylist_t *tracee_list) -{ - JL_TRY { - switch (how) { - case 1: { // rehash IdDict - jl_array_t **a = (jl_array_t**)v; - // Assume *a don't need a write barrier - *a = jl_idtable_rehash(*a, jl_array_len(*a)); - jl_gc_wb(v, *a); - break; - } - case 2: { // reinsert module v into parent (const) - jl_module_t *mod = (jl_module_t*)v; - if (mod->parent == mod) // top level modules handled by loader - break; - jl_binding_t *b = jl_get_binding_wr(mod->parent, mod->name, 1); // this can throw - jl_declare_constant(b); // this can also throw - if (b->value != NULL) { - if (!jl_is_module(b->value)) { - jl_errorf("Invalid redefinition of constant %s.", - jl_symbol_name(mod->name)); // this also throws - } - if (jl_generating_output() && jl_options.incremental) { - jl_errorf("Cannot replace module %s during incremental precompile.", jl_symbol_name(mod->name)); - } - jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(mod->name)); - } - b->value = v; - jl_gc_wb_binding(b, v); - break; - } - case 3: { // rehash MethodTable - jl_methtable_t *mt = (jl_methtable_t*)v; - if (tracee_list) - arraylist_push(tracee_list, mt); - break; - } - default: - assert(0 && "corrupt deserialization state"); - abort(); - } - } - JL_CATCH { - jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: error while reinitializing value "); - jl_static_show((JL_STREAM*)STDERR_FILENO, v); - jl_printf((JL_STREAM*)STDERR_FILENO, ":\n"); - jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); - jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); - jlbacktrace(); // written to STDERR_FILENO - } -} - -static jl_array_t *jl_finalize_deserializer(jl_serializer_state *s, arraylist_t *tracee_list) -{ - jl_array_t *init_order = (jl_array_t*)jl_deserialize_value(s, NULL); - - // run reinitialization functions - int pos = read_int32(s->s); - while (pos != -1) { - jl_reinit_item((jl_value_t*)backref_list.items[pos], read_int32(s->s), tracee_list); - pos = read_int32(s->s); - } - return init_order; -} - -JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order) -{ - int i, l = jl_array_len(init_order); - for (i = 0; i < l; i++) { - jl_value_t *mod = jl_array_ptr_ref(init_order, i); - if (!jl_generating_output() || jl_options.incremental) { - jl_module_run_initializer((jl_module_t*)mod); - } - else { - if (jl_module_init_order == NULL) - jl_module_init_order = jl_alloc_vec_any(0); - jl_array_ptr_1d_push(jl_module_init_order, mod); - } - } -} - - -// --- entry points --- - -// Register array of newly-inferred MethodInstances -// This gets called as the first step of Base.include_package_for_output -JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred) -{ - assert(_newly_inferred == NULL || jl_is_array(_newly_inferred)); - newly_inferred = (jl_array_t*) _newly_inferred; -} - -JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* linfo) -{ - JL_LOCK(&newly_inferred_mutex); - size_t end = jl_array_len(newly_inferred); - jl_array_grow_end(newly_inferred, 1); - jl_arrayset(newly_inferred, linfo, end); - JL_UNLOCK(&newly_inferred_mutex); -} - -// Serialize the modules in `worklist` to file `fname` -JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) -{ - JL_TIMING(SAVE_MODULE); - jl_task_t *ct = jl_current_task; - ios_t f; - if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) { - jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", fname); - return 1; - } - - jl_array_t *mod_array = NULL, *udeps = NULL; - jl_array_t *extext_methods = NULL, *mi_list = NULL; - jl_array_t *ext_targets = NULL, *edges = NULL; - JL_GC_PUSH7(&mod_array, &udeps, &extext_methods, &mi_list, &ext_targets, &edges, &edges_map); - - mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array) - assert(jl_precompile_toplevel_module == NULL); - jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); - - serializer_worklist = worklist; - write_header(&f); - // write description of contents (name, uuid, buildid) - write_work_list(&f); - // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist - // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. - // Also write Preferences. - int64_t srctextpos = write_dependency_list(&f, &udeps); // srctextpos: position of srctext entry in header index (update later) - // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed) - // this can return errors during deserialize, - // best to keep it early (before any actual initialization) - write_mod_list(&f, mod_array); - - arraylist_new(&reinit_list, 0); - htable_new(&backref_table, 5000); - htable_new(&external_mis, newly_inferred ? jl_array_len(newly_inferred) : 0); - ptrhash_put(&backref_table, jl_main_module, (char*)HT_NOTFOUND + 1); - backref_table_numel = 1; - jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL; - jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL; - jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL; - if (jl_bigint_type) { - gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")), - jl_symbol("BITS_PER_LIMB"))) / 8; - } - - jl_gc_enable_finalizers(ct, 0); // make sure we don't run any Julia code concurrently after this point - - // Save the inferred code from newly inferred, external methods - mi_list = queue_external_mis(newly_inferred); - - edges_map = jl_alloc_vec_any(0); - extext_methods = jl_alloc_vec_any(0); // [method1, simplesig1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist - size_t i, len = jl_array_len(mod_array); - for (i = 0; i < len; i++) { - jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i); - assert(jl_is_module(m)); - if (m->parent == m) // some toplevel modules (really just Base) aren't actually - jl_collect_extext_methods_from_mod(extext_methods, m); - } - jl_collect_methtable_from_mod(extext_methods, jl_type_type_mt); - jl_collect_missing_backedges(jl_type_type_mt); - jl_collect_methtable_from_mod(extext_methods, jl_nonfunction_mt); - jl_collect_missing_backedges(jl_nonfunction_mt); - // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in edges_map. - // Process this to extract `edges` and `ext_targets`. - ext_targets = jl_alloc_vec_any(0); // [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods - // ordinary dispatch: invokesig=NULL, callee is MethodInstance - // `invoke` dispatch: invokesig is signature, callee is MethodInstance - // abstract call: callee is signature - edges = jl_alloc_vec_any(0); // [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods - jl_collect_edges(edges, ext_targets); - - jl_serializer_state s = { - &f, - jl_current_task->ptls, - mod_array - }; - jl_serialize_value(&s, worklist); // serialize module-owned items (those accessible from the bindings table) - jl_serialize_value(&s, extext_methods); // serialize new worklist-owned methods for external functions - - // The next three allow us to restore code instances, if still valid - jl_serialize_value(&s, mi_list); - jl_serialize_value(&s, edges); - jl_serialize_value(&s, ext_targets); - jl_finalize_serializer(&s); - serializer_worklist = NULL; - - htable_free(&backref_table); - htable_free(&external_mis); - arraylist_free(&reinit_list); - - jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point - - // Write the source-text for the dependent files - if (udeps) { - // Go back and update the source-text position to point to the current position - int64_t posfile = ios_pos(&f); - ios_seek(&f, srctextpos); - write_uint64(&f, posfile); - ios_seek_end(&f); - // Each source-text file is written as - // int32: length of abspath - // char*: abspath - // uint64: length of src text - // char*: src text - // At the end we write int32(0) as a terminal sentinel. - len = jl_array_len(udeps); - ios_t srctext; - for (i = 0; i < len; i++) { - jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depmod = jl_fieldref(deptuple, 0); // module - // Dependencies declared with `include_dependency` are excluded - // because these may not be Julia code (and could be huge) - if (depmod != (jl_value_t*)jl_main_module) { - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - const char *depstr = jl_string_data(dep); - if (!depstr[0]) - continue; - ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); - if (!srctp) { - jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", - jl_string_data(dep)); - continue; - } - size_t slen = jl_string_len(dep); - write_int32(&f, slen); - ios_write(&f, depstr, slen); - posfile = ios_pos(&f); - write_uint64(&f, 0); // placeholder for length of this file in bytes - uint64_t filelen = (uint64_t) ios_copyall(&f, &srctext); - ios_close(&srctext); - ios_seek(&f, posfile); - write_uint64(&f, filelen); - ios_seek_end(&f); - } - } - } - write_int32(&f, 0); // mark the end of the source text - ios_close(&f); - JL_GC_POP(); - jl_precompile_toplevel_module = NULL; - - return 0; -} - -#ifndef JL_NDEBUG -// skip the performance optimizations of jl_types_equal and just use subtyping directly -// one of these types is invalid - that's why we're doing the recache type operation -static int jl_invalid_types_equal(jl_datatype_t *a, jl_datatype_t *b) -{ - return jl_subtype((jl_value_t*)a, (jl_value_t*)b) && jl_subtype((jl_value_t*)b, (jl_value_t*)a); -} -STATIC_INLINE jl_value_t *verify_type(jl_value_t *v) JL_NOTSAFEPOINT -{ - assert(v && jl_typeof(v) && jl_typeof(jl_typeof(v)) == (jl_value_t*)jl_datatype_type); - return v; -} -#endif - - -static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED; - -static jl_value_t *recache_type(jl_value_t *p) JL_GC_DISABLED -{ - if (jl_is_datatype(p)) { - jl_datatype_t *pdt = (jl_datatype_t*)p; - if (ptrhash_get(&uniquing_table, p) != HT_NOTFOUND) { - p = (jl_value_t*)recache_datatype(pdt); - } - else { - jl_svec_t *tt = pdt->parameters; - // ensure all type parameters are recached - size_t i, l = jl_svec_len(tt); - for (i = 0; i < l; i++) - jl_svecset(tt, i, recache_type(jl_svecref(tt, i))); - ptrhash_put(&uniquing_table, p, p); // ensures this algorithm isn't too exponential - } - } - else if (jl_is_typevar(p)) { - jl_tvar_t *ptv = (jl_tvar_t*)p; - ptv->lb = recache_type(ptv->lb); - ptv->ub = recache_type(ptv->ub); - } - else if (jl_is_uniontype(p)) { - jl_uniontype_t *pu = (jl_uniontype_t*)p; - pu->a = recache_type(pu->a); - pu->b = recache_type(pu->b); - } - else if (jl_is_unionall(p)) { - jl_unionall_t *pa = (jl_unionall_t*)p; - pa->var = (jl_tvar_t*)recache_type((jl_value_t*)pa->var); - pa->body = recache_type(pa->body); - } - else { - jl_datatype_t *pt = (jl_datatype_t*)jl_typeof(p); - jl_datatype_t *cachep = recache_datatype(pt); - if (cachep->instance) - p = cachep->instance; - else if (pt != cachep) - jl_set_typeof(p, cachep); - } - return p; -} - -// Extract pre-existing datatypes from cache, and insert new types into cache -// insertions also update uniquing_table -static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED -{ - jl_datatype_t *t; // the type after unique'ing - assert(verify_type((jl_value_t*)dt)); - t = (jl_datatype_t*)ptrhash_get(&uniquing_table, dt); - if (t == HT_NOTFOUND) - return dt; - if (t != NULL) - return t; - - jl_svec_t *tt = dt->parameters; - // recache all type parameters - size_t i, l = jl_svec_len(tt); - for (i = 0; i < l; i++) - jl_svecset(tt, i, recache_type(jl_svecref(tt, i))); - - // then recache the type itself - if (jl_svec_len(tt) == 0) { // jl_cache_type doesn't work if length(parameters) == 0 - t = dt; - } - else { - t = jl_lookup_cache_type_(dt); - if (t == NULL) { - jl_cache_type_(dt); - t = dt; - } - assert(t->hash == dt->hash); - assert(jl_invalid_types_equal(t, dt)); - } - ptrhash_put(&uniquing_table, dt, t); - return t; -} - -// Recache everything from flagref_list except methods and method instances -// Cleans out any handled items so that anything left in flagref_list still needs future processing -static void jl_recache_types(void) JL_GC_DISABLED -{ - size_t i; - // first rewrite all the unique'd objects - for (i = 0; i < flagref_list.len; i += 2) { - jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0]; - int offs = (int)(intptr_t)flagref_list.items[i + 1]; - jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; - if (!jl_is_method(o) && !jl_is_method_instance(o)) { - jl_datatype_t *dt; - jl_value_t *v; - if (jl_is_datatype(o)) { - dt = (jl_datatype_t*)o; - v = dt->instance; - } - else { - dt = (jl_datatype_t*)jl_typeof(o); - v = o; - } - jl_datatype_t *t = recache_datatype(dt); // get or create cached type (also updates uniquing_table) - if ((jl_value_t*)dt == o && t != dt) { - assert(!type_in_worklist(dt)); - if (loc) - *loc = (jl_value_t*)t; - if (offs > 0) - backref_list.items[offs] = t; - } - if (v == o && t->instance != v) { - assert(t->instance); - assert(loc); - *loc = t->instance; - if (offs > 0) - backref_list.items[offs] = t->instance; - } - } - } - // invalidate the old datatypes to help catch errors - for (i = 0; i < uniquing_table.size; i += 2) { - jl_datatype_t *o = (jl_datatype_t*)uniquing_table.table[i]; // deserialized ref - jl_datatype_t *t = (jl_datatype_t*)uniquing_table.table[i + 1]; // the real type - if (o != t) { - assert(t != NULL && jl_is_datatype(o)); - if (t->instance != o->instance) - jl_set_typeof(o->instance, (void*)(intptr_t)0x20); - jl_set_typeof(o, (void*)(intptr_t)0x10); - } - } - // then do a cleanup pass to drop these from future iterations of flagref_list - i = 0; - while (i < flagref_list.len) { - jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0]; - int offs = (int)(intptr_t)flagref_list.items[i + 1]; - jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; - if (jl_is_method(o) || jl_is_method_instance(o)) { - i += 2; - } - else { - // delete this item from the flagref list, so it won't be re-encountered later - flagref_list.len -= 2; - if (i >= flagref_list.len) - break; - flagref_list.items[i + 0] = flagref_list.items[flagref_list.len + 0]; // move end-of-list here (executes a `reverse()`) - flagref_list.items[i + 1] = flagref_list.items[flagref_list.len + 1]; - } - } -} - -// look up a method from a previously deserialized dependent module -static jl_method_t *jl_lookup_method(jl_methtable_t *mt, jl_datatype_t *sig, size_t world) -{ - if (world < jl_main_module->primary_world) - world = jl_main_module->primary_world; - struct jl_typemap_assoc search = {(jl_value_t*)sig, world, NULL, 0, ~(size_t)0}; - jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0); - return (jl_method_t*)entry->func.value; -} - -static jl_method_t *jl_recache_method(jl_method_t *m) -{ - assert(!m->is_for_opaque_closure); - assert(jl_is_method(m)); - jl_datatype_t *sig = (jl_datatype_t*)m->sig; - jl_methtable_t *mt = jl_method_get_table(m); - assert((jl_value_t*)mt != jl_nothing); - jl_set_typeof(m, (void*)(intptr_t)0x30); // invalidate the old value to help catch errors - return jl_lookup_method(mt, sig, m->module->primary_world); -} - -static jl_value_t *jl_recache_other_(jl_value_t *o); - -static jl_method_instance_t *jl_recache_method_instance(jl_method_instance_t *mi) -{ - jl_method_t *m = mi->def.method; - m = (jl_method_t*)jl_recache_other_((jl_value_t*)m); - assert(jl_is_method(m)); - jl_datatype_t *argtypes = (jl_datatype_t*)mi->specTypes; - jl_set_typeof(mi, (void*)(intptr_t)0x40); // invalidate the old value to help catch errors - jl_svec_t *env = jl_emptysvec; - jl_value_t *ti = jl_type_intersection_env((jl_value_t*)argtypes, (jl_value_t*)m->sig, &env); - //assert(ti != jl_bottom_type); (void)ti; - if (ti == jl_bottom_type) - env = jl_emptysvec; // the intersection may fail now if the type system had made an incorrect subtype env in the past - jl_method_instance_t *_new = jl_specializations_get_linfo(m, (jl_value_t*)argtypes, env); - return _new; -} - -static jl_value_t *jl_recache_other_(jl_value_t *o) -{ - jl_value_t *newo = (jl_value_t*)ptrhash_get(&uniquing_table, o); - if (newo != HT_NOTFOUND) - return newo; - if (jl_is_method(o)) { - // lookup the real Method based on the placeholder sig - newo = (jl_value_t*)jl_recache_method((jl_method_t*)o); - ptrhash_put(&uniquing_table, newo, newo); - } - else if (jl_is_method_instance(o)) { - // lookup the real MethodInstance based on the placeholder specTypes - newo = (jl_value_t*)jl_recache_method_instance((jl_method_instance_t*)o); - } - else { - abort(); - } - ptrhash_put(&uniquing_table, o, newo); - return newo; -} - -static void jl_recache_other(void) -{ - size_t i = 0; - while (i < flagref_list.len) { - jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0]; - int offs = (int)(intptr_t)flagref_list.items[i + 1]; - jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; - i += 2; - jl_value_t *newo = jl_recache_other_(o); - if (loc) - *loc = newo; - if (offs > 0) - backref_list.items[offs] = newo; - } - flagref_list.len = 0; -} - -// Wait to copy roots until recaching is done -// This is because recaching requires that all pointers to methods and methodinstances -// stay at their source location as recorded by flagref_list. Once recaching is complete, -// they can be safely copied over. -static void jl_copy_roots(void) -{ - size_t i, j, l; - for (i = 0; i < queued_method_roots.size; i+=2) { - jl_method_t *m = (jl_method_t*)queued_method_roots.table[i]; - m = (jl_method_t*)ptrhash_get(&uniquing_table, m); - jl_svec_t *keyroots = (jl_svec_t*)queued_method_roots.table[i+1]; - if (keyroots != HT_NOTFOUND) { - uint64_t key = (uint64_t)(uintptr_t)jl_svec_ref(keyroots, 0) | ((uint64_t)(uintptr_t)jl_svec_ref(keyroots, 1) << 32); - jl_array_t *roots = (jl_array_t*)jl_svec_ref(keyroots, 2); - assert(jl_is_array(roots)); - l = jl_array_len(roots); - for (j = 0; j < l; j++) { - jl_value_t *r = jl_array_ptr_ref(roots, j); - jl_value_t *newr = (jl_value_t*)ptrhash_get(&uniquing_table, r); - if (newr != HT_NOTFOUND) { - jl_array_ptr_set(roots, j, newr); - } - } - jl_append_method_roots(m, key, roots); - } - } -} - -static int trace_method(jl_typemap_entry_t *entry, void *closure) -{ - jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)entry->func.method); - return 1; -} - -// Restore module(s) from a cache file f -static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array) -{ - JL_TIMING(LOAD_MODULE); - jl_task_t *ct = jl_current_task; - if (ios_eof(f) || !jl_read_verify_header(f)) { - ios_close(f); - return jl_get_exceptionf(jl_errorexception_type, - "Precompile file header verification checks failed."); - } - { // skip past the mod list - size_t len; - while ((len = read_int32(f))) - ios_skip(f, len + 3 * sizeof(uint64_t)); - } - { // skip past the dependency list - size_t deplen = read_uint64(f); - ios_skip(f, deplen); - } - - jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL; - if (jl_bigint_type) { - gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")), - jl_symbol("BITS_PER_LIMB"))) / 8; - } - - // verify that the system state is valid - jl_value_t *verify_fail = read_verify_mod_list(f, mod_array); - if (verify_fail) { - ios_close(f); - return verify_fail; - } - - // prepare to deserialize - int en = jl_gc_enable(0); - jl_gc_enable_finalizers(ct, 0); - jl_atomic_fetch_add(&jl_world_counter, 1); // reserve a world age for the deserialization - - arraylist_new(&backref_list, 4000); - arraylist_push(&backref_list, jl_main_module); - arraylist_new(&flagref_list, 0); - htable_new(&queued_method_roots, 0); - htable_new(&new_code_instance_validate, 0); - arraylist_new(&ccallable_list, 0); - htable_new(&uniquing_table, 0); - - jl_serializer_state s = { - f, - ct->ptls, - mod_array - }; - jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored); - serializer_worklist = restored; - assert(jl_typeis((jl_value_t*)restored, jl_array_any_type)); - - // See explanation in jl_save_incremental for variables of the same names - jl_value_t *extext_methods = jl_deserialize_value(&s, &extext_methods); - jl_value_t *mi_list = jl_deserialize_value(&s, &mi_list); // reload MIs stored by queue_external_mis - jl_value_t *edges = jl_deserialize_value(&s, &edges); - jl_value_t *ext_targets = jl_deserialize_value(&s, &ext_targets); - - arraylist_t *tracee_list = NULL; - if (jl_newmeth_tracer) // debugging - tracee_list = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0); - - // at this point, the AST is fully reconstructed, but still completely disconnected - // now all of the interconnects will be created - jl_recache_types(); // make all of the types identities correct - jl_insert_methods((jl_array_t*)extext_methods); // hook up extension methods for external generic functions (needs to be after recache types) - jl_recache_other(); // make all of the other objects identities correct (needs to be after insert methods) - jl_copy_roots(); // copying new roots of external methods (must wait until recaching is complete) - htable_free(&uniquing_table); - jl_array_t *init_order = jl_finalize_deserializer(&s, tracee_list); // done with f and s (needs to be after recache) - if (init_order == NULL) - init_order = (jl_array_t*)jl_an_empty_vec_any; - assert(jl_typeis((jl_value_t*)init_order, jl_array_any_type)); - - JL_GC_PUSH5(&init_order, &restored, &edges, &ext_targets, &mi_list); - jl_gc_enable(en); // subtyping can allocate a lot, not valid before recache-other - - jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)mi_list); // restore external backedges (needs to be last) - // check new CodeInstances and validate any that lack external backedges - validate_new_code_instances(); - - serializer_worklist = NULL; - htable_free(&new_code_instance_validate); - arraylist_free(&flagref_list); - arraylist_free(&backref_list); - htable_free(&queued_method_roots); - ios_close(f); - - jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point - if (tracee_list) { - jl_methtable_t *mt; - while ((mt = (jl_methtable_t*)arraylist_pop(tracee_list)) != NULL) { - JL_GC_PROMISE_ROOTED(mt); - jl_typemap_visitor(mt->defs, trace_method, NULL); - } - arraylist_free(tracee_list); - free(tracee_list); - } - for (int i = 0; i < ccallable_list.len; i++) { - jl_svec_t *item = (jl_svec_t*)ccallable_list.items[i]; - JL_GC_PROMISE_ROOTED(item); - int success = jl_compile_extern_c(NULL, NULL, NULL, jl_svecref(item, 0), jl_svecref(item, 1)); - if (!success) - jl_safe_printf("@ccallable was already defined for this method name\n"); - } - arraylist_free(&ccallable_list); - jl_value_t *ret = (jl_value_t*)jl_svec(2, restored, init_order); - JL_GC_POP(); - - return (jl_value_t*)ret; -} - -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *mod_array) -{ - ios_t f; - ios_static_buffer(&f, (char*)buf, sz); - return _jl_restore_incremental(&f, mod_array); -} - -JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *mod_array) -{ - ios_t f; - if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) { - return jl_get_exceptionf(jl_errorexception_type, - "Cache file \"%s\" not found.\n", fname); - } - return _jl_restore_incremental(&f, mod_array); -} - -// --- init --- - -void jl_init_serializer(void) -{ - jl_task_t *ct = jl_current_task; - htable_new(&ser_tag, 0); - htable_new(&common_symbol_tag, 0); - htable_new(&backref_table, 0); - - void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type, - jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"), - jl_an_empty_string, jl_an_empty_vec_any, - - // empirical list of very common symbols - #include "common_symbols1.inc" - - jl_box_int32(0), jl_box_int32(1), jl_box_int32(2), - jl_box_int32(3), jl_box_int32(4), jl_box_int32(5), - jl_box_int32(6), jl_box_int32(7), jl_box_int32(8), - jl_box_int32(9), jl_box_int32(10), jl_box_int32(11), - jl_box_int32(12), jl_box_int32(13), jl_box_int32(14), - jl_box_int32(15), jl_box_int32(16), jl_box_int32(17), - jl_box_int32(18), jl_box_int32(19), jl_box_int32(20), - - jl_box_int64(0), jl_box_int64(1), jl_box_int64(2), - jl_box_int64(3), jl_box_int64(4), jl_box_int64(5), - jl_box_int64(6), jl_box_int64(7), jl_box_int64(8), - jl_box_int64(9), jl_box_int64(10), jl_box_int64(11), - jl_box_int64(12), jl_box_int64(13), jl_box_int64(14), - jl_box_int64(15), jl_box_int64(16), jl_box_int64(17), - jl_box_int64(18), jl_box_int64(19), jl_box_int64(20), - - jl_bool_type, jl_linenumbernode_type, jl_pinode_type, - jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type, - jl_pointer_type, jl_abstractarray_type, jl_nothing_type, - jl_vararg_type, - jl_densearray_type, jl_function_type, jl_typename_type, - jl_builtin_type, jl_task_type, jl_uniontype_type, - jl_array_any_type, jl_intrinsic_type, - jl_abstractslot_type, jl_methtable_type, jl_typemap_level_type, - jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type, - jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type), - jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type, - jl_typeofbottom_type, jl_typeofbottom_type->super, - jl_namedtuple_type, jl_array_int32_type, - jl_typedslot_type, jl_uint32_type, jl_uint64_type, - jl_type_type_mt, jl_nonfunction_mt, - jl_opaque_closure_type, - - ct->ptls->root_task, - - NULL }; - - // more common symbols, less common than those above. will get 2-byte encodings. - void *common_symbols[] = { - #include "common_symbols2.inc" - NULL - }; - - deser_tag[TAG_SYMBOL] = (jl_value_t*)jl_symbol_type; - deser_tag[TAG_SSAVALUE] = (jl_value_t*)jl_ssavalue_type; - deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type; - deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type; - deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type; - deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type; - deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type; - deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type; - deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type; - deser_tag[TAG_STRING] = (jl_value_t*)jl_string_type; - deser_tag[TAG_MODULE] = (jl_value_t*)jl_module_type; - deser_tag[TAG_TVAR] = (jl_value_t*)jl_tvar_type; - deser_tag[TAG_METHOD_INSTANCE] = (jl_value_t*)jl_method_instance_type; - deser_tag[TAG_METHOD] = (jl_value_t*)jl_method_type; - deser_tag[TAG_CODE_INSTANCE] = (jl_value_t*)jl_code_instance_type; - deser_tag[TAG_GLOBALREF] = (jl_value_t*)jl_globalref_type; - deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type; - deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type; - deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type; - deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type; - deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type; - deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type; - deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type; - deser_tag[TAG_GOTOIFNOT] = (jl_value_t*)jl_gotoifnot_type; - deser_tag[TAG_RETURNNODE] = (jl_value_t*)jl_returnnode_type; - deser_tag[TAG_ARGUMENT] = (jl_value_t*)jl_argument_type; - - intptr_t i = 0; - while (vals[i] != NULL) { - deser_tag[LAST_TAG+1+i] = (jl_value_t*)vals[i]; - i += 1; - } - assert(LAST_TAG+1+i < 256); - - for (i = 2; i < 256; i++) { - if (deser_tag[i]) - ptrhash_put(&ser_tag, deser_tag[i], (void*)i); - } - - i = 2; - while (common_symbols[i-2] != NULL) { - ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); - deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; - i += 1; - } - assert(i <= 256); -} - -#ifdef __cplusplus -} -#endif diff --git a/src/gc.c b/src/gc.c index 212a4b4d691a4b..0fa2077f4edaf7 100644 --- a/src/gc.c +++ b/src/gc.c @@ -173,6 +173,11 @@ jl_gc_num_t gc_num = {0}; static size_t last_long_collect_interval; int gc_n_threads; jl_ptls_t* gc_all_tls_states; +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) +{ + return jl_buff_tag; +} pagetable_t memory_map; @@ -1759,14 +1764,6 @@ JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) static void *volatile gc_findval; // for usage from gdb, for finding the gc-root for a value #endif -static void *sysimg_base; -static void *sysimg_end; -void jl_gc_set_permalloc_region(void *start, void *end) -{ - sysimg_base = start; - sysimg_end = end; -} - // Handle the case where the stack is only partially copied. STATIC_INLINE uintptr_t gc_get_stack_addr(void *_addr, uintptr_t offset, @@ -2551,7 +2548,7 @@ module_binding: { jl_binding_t *b = *begin; if (b == (jl_binding_t*)HT_NOTFOUND) continue; - if ((void*)b >= sysimg_base && (void*)b < sysimg_end) { + if (jl_object_in_image((jl_value_t*)b)) { jl_taggedvalue_t *buf = jl_astaggedvalue(b); uintptr_t tag = buf->header; uint8_t bits; @@ -2676,7 +2673,7 @@ mark: { jl_datatype_t *vt = (jl_datatype_t*)tag; int foreign_alloc = 0; int update_meta = __likely(!meta_updated && !gc_verifying); - if (update_meta && (void*)o >= sysimg_base && (void*)o < sysimg_end) { + if (update_meta && jl_object_in_image(new_obj)) { foreign_alloc = 1; update_meta = 0; } @@ -3025,6 +3022,8 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) } if (_jl_debug_method_invalidation != NULL) gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation); + if (jl_build_ids != NULL) + gc_mark_queue_obj(gc_cache, sp, jl_build_ids); // constants gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type); @@ -4089,8 +4088,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { - static_assert(jl_buff_tag % GC_PAGE_SZ == 0, - "jl_buff_tag must be a multiple of GC_PAGE_SZ"); if (jl_is_initialized()) { int result = jl_atomic_fetch_or(&support_conservative_marking, 1); if (!result) { @@ -4197,8 +4194,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) valid_object: // We have to treat objects with type `jl_buff_tag` differently, // as they must not be passed to the usual marking functions. - // Note that jl_buff_tag is a multiple of GC_PAGE_SZ, thus it - // cannot be a type reference. + // Note that jl_buff_tag is real pointer into libjulia, + // thus it cannot be a type reference. if ((cell->header & ~(uintptr_t) 3) == jl_buff_tag) return NULL; return jl_valueof(cell); diff --git a/src/gf.c b/src/gf.c index 0e98f2a140d4a1..d9bb6994e8ea71 100644 --- a/src/gf.c +++ b/src/gf.c @@ -459,7 +459,7 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure) return 1; } -static int foreach_mtable_in_module( +int foreach_mtable_in_module( jl_module_t *m, int (*visit)(jl_methtable_t *mt, void *env), void *env) diff --git a/src/init.c b/src/init.c index 926aa050629260..89f4153ff15384 100644 --- a/src/init.c +++ b/src/init.c @@ -783,6 +783,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_install_default_signal_handlers(); jl_gc_init(); + + arraylist_new(&jl_linkage_blobs, 0); + arraylist_new(&jl_image_relocs, 0); + jl_ptls_t ptls = jl_init_threadtls(0); #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 @@ -808,7 +812,7 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ jl_restore_system_image(jl_options.image_file); } else { jl_init_types(); - jl_global_roots_table = jl_alloc_vec_any(16); + jl_global_roots_table = jl_alloc_vec_any(0); jl_init_codegen(); } diff --git a/src/ircode.c b/src/ircode.c index 1c857051217d03..9f71d8e8dd28cc 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -29,6 +29,34 @@ typedef struct { uint8_t relocatability; } jl_ircode_state; +// type => tag hash for a few core types (e.g., Expr, PhiNode, etc) +static htable_t ser_tag; +// tag => type mapping, the reverse of ser_tag +static jl_value_t *deser_tag[256]; +// hash of some common symbols, encoded as CommonSym_tag plus 1 byte +static htable_t common_symbol_tag; +static jl_value_t *deser_symbols[256]; + +void *jl_lookup_ser_tag(jl_value_t *v) +{ + return ptrhash_get(&ser_tag, v); +} + +void *jl_lookup_common_symbol(jl_value_t *v) +{ + return ptrhash_get(&common_symbol_tag, v); +} + +jl_value_t *jl_deser_tag(uint8_t tag) +{ + return deser_tag[tag]; +} + +jl_value_t *jl_deser_symbol(uint8_t tag) +{ + return deser_symbols[tag]; +} + // --- encoding --- #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0) @@ -1020,6 +1048,110 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i) return jl_nothing; } +void jl_init_serializer(void) +{ + jl_task_t *ct = jl_current_task; + htable_new(&ser_tag, 0); + htable_new(&common_symbol_tag, 0); + + void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type, + jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"), + jl_an_empty_string, jl_an_empty_vec_any, + + // empirical list of very common symbols + #include "common_symbols1.inc" + + jl_box_int32(0), jl_box_int32(1), jl_box_int32(2), + jl_box_int32(3), jl_box_int32(4), jl_box_int32(5), + jl_box_int32(6), jl_box_int32(7), jl_box_int32(8), + jl_box_int32(9), jl_box_int32(10), jl_box_int32(11), + jl_box_int32(12), jl_box_int32(13), jl_box_int32(14), + jl_box_int32(15), jl_box_int32(16), jl_box_int32(17), + jl_box_int32(18), jl_box_int32(19), jl_box_int32(20), + + jl_box_int64(0), jl_box_int64(1), jl_box_int64(2), + jl_box_int64(3), jl_box_int64(4), jl_box_int64(5), + jl_box_int64(6), jl_box_int64(7), jl_box_int64(8), + jl_box_int64(9), jl_box_int64(10), jl_box_int64(11), + jl_box_int64(12), jl_box_int64(13), jl_box_int64(14), + jl_box_int64(15), jl_box_int64(16), jl_box_int64(17), + jl_box_int64(18), jl_box_int64(19), jl_box_int64(20), + + jl_bool_type, jl_linenumbernode_type, jl_pinode_type, + jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type, + jl_pointer_type, jl_abstractarray_type, jl_nothing_type, + jl_vararg_type, + jl_densearray_type, jl_function_type, jl_typename_type, + jl_builtin_type, jl_task_type, jl_uniontype_type, + jl_array_any_type, jl_intrinsic_type, + jl_abstractslot_type, jl_methtable_type, jl_typemap_level_type, + jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type, + jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type), + jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type, + jl_typeofbottom_type, jl_typeofbottom_type->super, + jl_namedtuple_type, jl_array_int32_type, + jl_typedslot_type, jl_uint32_type, jl_uint64_type, + jl_type_type_mt, jl_nonfunction_mt, + jl_opaque_closure_type, + + ct->ptls->root_task, + + NULL }; + + // more common symbols, less common than those above. will get 2-byte encodings. + void *common_symbols[] = { + #include "common_symbols2.inc" + NULL + }; + + deser_tag[TAG_SYMBOL] = (jl_value_t*)jl_symbol_type; + deser_tag[TAG_SSAVALUE] = (jl_value_t*)jl_ssavalue_type; + deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type; + deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type; + deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type; + deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type; + deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type; + deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type; + deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type; + deser_tag[TAG_STRING] = (jl_value_t*)jl_string_type; + deser_tag[TAG_MODULE] = (jl_value_t*)jl_module_type; + deser_tag[TAG_TVAR] = (jl_value_t*)jl_tvar_type; + deser_tag[TAG_METHOD_INSTANCE] = (jl_value_t*)jl_method_instance_type; + deser_tag[TAG_METHOD] = (jl_value_t*)jl_method_type; + deser_tag[TAG_CODE_INSTANCE] = (jl_value_t*)jl_code_instance_type; + deser_tag[TAG_GLOBALREF] = (jl_value_t*)jl_globalref_type; + deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type; + deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type; + deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type; + deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type; + deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type; + deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type; + deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type; + deser_tag[TAG_GOTOIFNOT] = (jl_value_t*)jl_gotoifnot_type; + deser_tag[TAG_RETURNNODE] = (jl_value_t*)jl_returnnode_type; + deser_tag[TAG_ARGUMENT] = (jl_value_t*)jl_argument_type; + + intptr_t i = 0; + while (vals[i] != NULL) { + deser_tag[LAST_TAG+1+i] = (jl_value_t*)vals[i]; + i += 1; + } + assert(LAST_TAG+1+i < 256); + + for (i = 2; i < 256; i++) { + if (deser_tag[i]) + ptrhash_put(&ser_tag, deser_tag[i], (void*)i); + } + + i = 2; + while (common_symbols[i-2] != NULL) { + ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); + deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; + i += 1; + } + assert(i <= 256); +} + #ifdef __cplusplus } #endif diff --git a/src/jitlayers.h b/src/jitlayers.h index ba38abff0d6f46..77ac5d64bb46d2 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -220,7 +220,6 @@ jl_llvm_functions_t jl_emit_codeinst( enum CompilationPolicy { Default = 0, Extern = 1, - ImagingMode = 2 }; typedef std::map> jl_workqueue_t; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 714998b650e284..f97c9894238591 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -400,7 +400,7 @@ XX(jl_resolve_globals_in_ir) \ XX(jl_restore_excstack) \ XX(jl_restore_incremental) \ - XX(jl_restore_incremental_from_buf) \ + XX(jl_restore_package_image_from_file) \ XX(jl_restore_system_image) \ XX(jl_restore_system_image_data) \ XX(jl_rethrow) \ @@ -408,8 +408,6 @@ XX(jl_rettype_inferred) \ XX(jl_running_on_valgrind) \ XX(jl_safe_printf) \ - XX(jl_save_incremental) \ - XX(jl_save_system_image) \ XX(jl_SC_CLK_TCK) \ XX(jl_set_ARGS) \ XX(jl_set_const) \ @@ -520,6 +518,7 @@ XX(jl_vexceptionf) \ XX(jl_vprintf) \ XX(jl_wakeup_thread) \ + XX(jl_write_compiler_output) \ XX(jl_yield) \ #define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \ @@ -535,7 +534,7 @@ YY(jl_get_llvm_module) \ YY(jl_get_LLVM_VERSION) \ YY(jl_dump_native) \ - YY(jl_get_llvm_gv) \ + YY(jl_get_llvm_gvs) \ YY(jl_dump_function_asm) \ YY(jl_LLVMCreateDisasm) \ YY(jl_LLVMDisasmInstruction) \ diff --git a/src/julia.expmap b/src/julia.expmap index 41299aa808572a..35cc5eac48b6ac 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -5,6 +5,7 @@ asprintf; bitvector_*; ios_*; + arraylist_grow; small_arraylist_grow; jl_*; ijl_*; diff --git a/src/julia.h b/src/julia.h index 981e6a0ee8232e..6d51f6d081da9c 100644 --- a/src/julia.h +++ b/src/julia.h @@ -315,7 +315,7 @@ typedef struct _jl_method_t { jl_array_t *roots; // pointers in generated code (shared to reduce memory), or null // Identify roots by module-of-origin. We only track the module for roots added during incremental compilation. // May be NULL if no external roots have been added, otherwise it's a Vector{UInt64} - jl_array_t *root_blocks; // RLE (build_id, offset) pairs (even/odd indexing) + jl_array_t *root_blocks; // RLE (build_id.lo, offset) pairs (even/odd indexing) int32_t nroots_sysimg; // # of roots stored in the system image jl_svec_t *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this @@ -592,7 +592,7 @@ typedef struct _jl_module_t { // hidden fields: htable_t bindings; arraylist_t usings; // modules with all bindings potentially imported - uint64_t build_id; + jl_uuid_t build_id; jl_uuid_t uuid; size_t primary_world; _Atomic(uint32_t) counter; @@ -841,6 +841,7 @@ extern void JL_GC_PUSH3(void *, void *, void *) JL_NOTSAFEPOINT; extern void JL_GC_PUSH4(void *, void *, void *, void *) JL_NOTSAFEPOINT; extern void JL_GC_PUSH5(void *, void *, void *, void *, void *) JL_NOTSAFEPOINT; extern void JL_GC_PUSH7(void *, void *, void *, void *, void *, void *, void *) JL_NOTSAFEPOINT; +extern void JL_GC_PUSH8(void *, void *, void *, void *, void *, void *, void *, void *) JL_NOTSAFEPOINT; extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT; // This is necessary, because otherwise the analyzer considers this undefined // behavior and terminates the exploration @@ -880,6 +881,9 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT; #define JL_GC_PUSH7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(7), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; +#define JL_GC_PUSH8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(8), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8}; \ + jl_pgcstack = (jl_gcframe_t*)__gc_stkf; #define JL_GC_PUSHARGS(rts_var,n) \ @@ -1763,15 +1767,12 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void); JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s); JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname); JL_DLLEXPORT void jl_set_sysimg_so(void *handle); -JL_DLLEXPORT ios_t *jl_create_system_image(void *); -JL_DLLEXPORT void jl_save_system_image(const char *fname); +JL_DLLEXPORT ios_t *jl_create_system_image(void *, jl_array_t *worklist); JL_DLLEXPORT void jl_restore_system_image(const char *fname); JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len); -JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred); -JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *linfo); -JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist); -JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods); -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods); +JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete); + +JL_DLLEXPORT void jl_write_compiler_output(void); // parsing JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len, diff --git a/src/julia_internal.h b/src/julia_internal.h index f1929892df5513..6ddfa5d92072c9 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -290,6 +290,9 @@ extern tracer_cb jl_newmeth_tracer; void jl_call_tracer(tracer_cb callback, jl_value_t *tracee); void print_func_loc(JL_STREAM *s, jl_method_t *m); extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED; +extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages +extern jl_array_t *jl_build_ids JL_GLOBALLY_ROOTED; // external linkage: corresponding build_ids +extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages extern JL_DLLEXPORT size_t jl_page_size; extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; @@ -460,9 +463,12 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); # define jl_gc_alloc(ptls, sz, ty) jl_gc_alloc_(ptls, sz, ty) #endif -// jl_buff_tag must be a multiple of GC_PAGE_SZ so that it can't be -// confused for an actual type reference. -#define jl_buff_tag ((uintptr_t)0x4eadc000) +// jl_buff_tag must be an actual pointer here, so it cannot be confused for an actual type reference. +// defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it +const extern uint64_t _jl_buff_tag[3]; +#define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16)) +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void); + typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz) { @@ -608,9 +614,9 @@ void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *ca JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root); void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots); -int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i); -jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index); -int nroots_with_key(jl_method_t *m, uint64_t key); +int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i) JL_NOTSAFEPOINT; +jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index) JL_NOTSAFEPOINT; +int nroots_with_key(jl_method_t *m, uint64_t key) JL_NOTSAFEPOINT; int jl_valid_type_param(jl_value_t *v); @@ -690,6 +696,7 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n); jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module); jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st); int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env); +int foreach_mtable_in_module(jl_module_t *m, int (*visit)(jl_methtable_t *mt, void *env), void *env); void jl_init_main_module(void); JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT; jl_array_t *jl_get_loaded_modules(void); @@ -900,7 +907,7 @@ typedef DWORD jl_pgcstack_key_t; #else typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT; #endif -JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k); +JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) JL_NOTSAFEPOINT; #if !defined(_OS_WINDOWS_) && !defined(__APPLE__) && !defined(JL_DISABLE_LIBUNWIND) extern pthread_mutex_t in_signal_lock; @@ -918,7 +925,38 @@ static inline void jl_set_gc_and_wait(void) jl_atomic_store_release(&ct->ptls->gc_state, state); } #endif -void jl_gc_set_permalloc_region(void *start, void *end); + +// Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image) +STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT +{ + if (!jl_build_ids) + return 0; + assert(jl_is_array(jl_build_ids)); + return jl_array_len(jl_build_ids); +} + +// TODO: Makes this a binary search +STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT { + size_t i, nblobs = n_linkage_blobs(); + assert(jl_linkage_blobs.len == 2*nblobs); + for (i = 0; i < nblobs; i++) { + uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i]; + uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1]; + if (left < (uintptr_t)v && (uintptr_t)v <= right) { + // the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here) + break; + } + } + return i; +} + +STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT { + size_t blob = external_blob_index(v); + if (blob == n_linkage_blobs()) { + return 0; + } + return 1; +} typedef struct { LLVMOrcThreadSafeModuleRef TSM; @@ -932,11 +970,11 @@ JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char JL_DLLEXPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo); JL_DLLEXPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); -void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy); +void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode); void jl_dump_native(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, const char *sysimg_data, size_t sysimg_len); -int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p) JL_NOTSAFEPOINT; +void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs); JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode, int32_t *func_idx, int32_t *specfunc_idx); @@ -1223,6 +1261,7 @@ extern void *jl_ntdll_handle; extern void *jl_kernel32_handle; extern void *jl_crtdll_handle; extern void *jl_winsock_handle; +void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT; #endif JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err); @@ -1563,7 +1602,6 @@ void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t * jl_method_instance_t **linfos, size_t n); void jl_write_coverage_data(const char*); void jl_write_malloc_log(void); -void jl_write_compiler_output(void); #if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_) # define jl_unreachable() __builtin_unreachable() @@ -1616,6 +1654,8 @@ JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT; //JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT; //JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len); + #ifdef __cplusplus } #endif diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index f2fdb6f4fd1c86..815ebfe7ed1011 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -306,23 +306,31 @@ static inline std::vector consume_gv(Module &M, const char *name, bool allow // Strip them from the Module so that it's easier to handle the uses. GlobalVariable *gv = M.getGlobalVariable(name); assert(gv && gv->hasInitializer()); - auto *ary = cast(gv->getInitializer()); - unsigned nele = ary->getNumOperands(); + ArrayType *Ty = cast(gv->getInitializer()->getType()); + unsigned nele = Ty->getArrayNumElements(); std::vector res(nele); - unsigned i = 0; - while (i < nele) { - llvm::Value *val = ary->getOperand(i)->stripPointerCasts(); - if (allow_bad_fvars && (!isa(val) || (isa(val) && cast(val)->isDeclaration()))) { - // Shouldn't happen in regular use, but can happen in bugpoint. - nele--; - continue; + ConstantArray *ary = nullptr; + if (gv->getInitializer()->isNullValue()) { + for (unsigned i = 0; i < nele; ++i) + res[i] = cast(Constant::getNullValue(Ty->getArrayElementType())); + } + else { + ary = cast(gv->getInitializer()); + unsigned i = 0; + while (i < nele) { + llvm::Value *val = ary->getOperand(i)->stripPointerCasts(); + if (allow_bad_fvars && (!isa(val) || (isa(val) && cast(val)->isDeclaration()))) { + // Shouldn't happen in regular use, but can happen in bugpoint. + nele--; + continue; + } + res[i++] = cast(val); } - res[i++] = cast(val); + res.resize(nele); } - res.resize(nele); assert(gv->use_empty()); gv->eraseFromParent(); - if (ary->use_empty()) + if (ary && ary->use_empty()) ary->destroyConstant(); return res; } @@ -935,17 +943,24 @@ Constant *CloneCtx::emit_offset_table(const std::vector &vars, StringRef nam { auto T_int32 = Type::getInt32Ty(M.getContext()); auto T_size = getSizeTy(M.getContext()); - assert(!vars.empty()); - add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage, - name + "_base", - ConstantExpr::getBitCast(vars[0], T_size->getPointerTo()), &M)); - auto vbase = ConstantExpr::getPtrToInt(vars[0], T_size); uint32_t nvars = vars.size(); + Constant *base = nullptr; + if (nvars > 0) { + base = ConstantExpr::getBitCast(vars[0], T_size->getPointerTo()); + add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage, + name + "_base", + base, &M)); + } else { + base = ConstantExpr::getNullValue(T_size->getPointerTo()); + } + auto vbase = ConstantExpr::getPtrToInt(base, T_size); std::vector offsets(nvars + 1); offsets[0] = ConstantInt::get(T_int32, nvars); - offsets[1] = ConstantInt::get(T_int32, 0); - for (uint32_t i = 1; i < nvars; i++) - offsets[i + 1] = get_ptrdiff32(vars[i], vbase); + if (nvars > 0) { + offsets[1] = ConstantInt::get(T_int32, 0); + for (uint32_t i = 1; i < nvars; i++) + offsets[i + 1] = get_ptrdiff32(vars[i], vbase); + } ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1); add_comdat(new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage, diff --git a/src/method.c b/src/method.c index f8fe34f7cffd63..8cb1c1167523bf 100644 --- a/src/method.c +++ b/src/method.c @@ -1188,7 +1188,7 @@ JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_ uint64_t modid = 0; if (mod) { assert(jl_is_module(mod)); - modid = mod->build_id; + modid = mod->build_id.lo; } assert(jl_is_method(m)); prepare_method_for_roots(m, modid); diff --git a/src/module.c b/src/module.c index 0dc5e20d18b89e..605bcd3c2b7737 100644 --- a/src/module.c +++ b/src/module.c @@ -23,9 +23,10 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t default_names) m->istopmod = 0; m->uuid = uuid_zero; static unsigned int mcounter; // simple counter backup, in case hrtime is not incrementing - m->build_id = jl_hrtime() + (++mcounter); - if (!m->build_id) - m->build_id++; // build id 0 is invalid + m->build_id.lo = jl_hrtime() + (++mcounter); + if (!m->build_id.lo) + m->build_id.lo++; // build id 0 is invalid + m->build_id.hi = ~(uint64_t)0; m->primary_world = 0; m->counter = 1; m->nospecialize = 0; @@ -936,7 +937,7 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported) JL_DLLEXPORT jl_sym_t *jl_module_name(jl_module_t *m) { return m->name; } JL_DLLEXPORT jl_module_t *jl_module_parent(jl_module_t *m) { return m->parent; } -JL_DLLEXPORT uint64_t jl_module_build_id(jl_module_t *m) { return m->build_id; } +JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; } JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; } // TODO: make this part of the module constructor and read-only? @@ -972,6 +973,22 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m) JL_UNLOCK(&m->lock); } +JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order) +{ + int i, l = jl_array_len(init_order); + for (i = 0; i < l; i++) { + jl_value_t *mod = jl_array_ptr_ref(init_order, i); + if (!jl_generating_output() || jl_options.incremental) { + jl_module_run_initializer((jl_module_t*)mod); + } + else { + if (jl_module_init_order == NULL) + jl_module_init_order = jl_alloc_vec_any(0); + jl_array_ptr_1d_push(jl_module_init_order, mod); + } + } +} + #ifdef __cplusplus } #endif diff --git a/src/precompile.c b/src/precompile.c index d5d8416c1097b9..9c9c79b154a324 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -21,17 +21,14 @@ JL_DLLEXPORT int jl_generating_output(void) } static void *jl_precompile(int all); +static void *jl_precompile_worklist(jl_array_t *worklist); -void jl_write_compiler_output(void) +JL_DLLEXPORT void jl_write_compiler_output(void) { if (!jl_generating_output()) { return; } - void *native_code = NULL; - if (!jl_options.incremental) - native_code = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); - if (!jl_module_init_order) { jl_printf(JL_STDERR, "WARNING: --output requested, but no modules defined during run\n"); return; @@ -60,46 +57,51 @@ void jl_write_compiler_output(void) } } + assert(jl_precompile_toplevel_module == NULL); + void *native_code = NULL; + if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) { + if (jl_options.incremental) + jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); + native_code = jl_options.incremental ? jl_precompile_worklist(worklist) : jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); + if (jl_options.incremental) + jl_precompile_toplevel_module = NULL; + } + if (jl_options.incremental) { - if (jl_options.outputji) - if (jl_save_incremental(jl_options.outputji, worklist)) - jl_exit(1); if (jl_options.outputbc || jl_options.outputunoptbc) jl_printf(JL_STDERR, "WARNING: incremental output to a .bc file is not implemented\n"); - if (jl_options.outputo) - jl_printf(JL_STDERR, "WARNING: incremental output to a .o file is not implemented\n"); if (jl_options.outputasm) jl_printf(JL_STDERR, "WARNING: incremental output to a .s file is not implemented\n"); + if (jl_options.outputo) { + jl_printf(JL_STDERR, "WARNING: incremental output to a .o file is not implemented\n"); + } } - else { - ios_t *s = NULL; - if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) - s = jl_create_system_image(native_code); - if (jl_options.outputji) { - if (s == NULL) { - jl_save_system_image(jl_options.outputji); - } - else { - ios_t f; - if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) - jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); - ios_write(&f, (const char*)s->buf, (size_t)s->size); - ios_close(&f); - } - } + ios_t *s = jl_create_system_image(native_code, jl_options.incremental ? worklist : NULL); - if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) { - assert(s); - jl_dump_native(native_code, - jl_options.outputbc, - jl_options.outputunoptbc, - jl_options.outputo, - jl_options.outputasm, - (const char*)s->buf, (size_t)s->size); - jl_postoutput_hook(); - } + if (jl_options.outputji) { + ios_t f; + if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) + jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); + ios_write(&f, (const char*)s->buf, (size_t)s->size); + ios_close(&f); } + + if (native_code) { + jl_dump_native(native_code, + jl_options.outputbc, + jl_options.outputunoptbc, + jl_options.outputo, + jl_options.outputasm, + (const char*)s->buf, (size_t)s->size); + jl_postoutput_hook(); + } + + if (s) { + ios_close(s); + free(s); + } + for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: "); @@ -340,16 +342,11 @@ static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env) return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env); } -static void *jl_precompile(int all) +static void *jl_precompile_(jl_array_t *m) { - // array of MethodInstances and ccallable aliases to include in the output - jl_array_t *m = jl_alloc_vec_any(0); jl_array_t *m2 = NULL; jl_method_instance_t *mi = NULL; - JL_GC_PUSH3(&m, &m2, &mi); - if (all) - jl_compile_all_defs(m); - jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m); + JL_GC_PUSH2(&m2, &mi); m2 = jl_alloc_vec_any(0); for (size_t i = 0; i < jl_array_len(m); i++) { jl_value_t *item = jl_array_ptr_ref(m, i); @@ -368,8 +365,39 @@ static void *jl_precompile(int all) jl_array_ptr_1d_push(m2, item); } } - m = NULL; - void *native_code = jl_create_native(m2, NULL, NULL, 0); + void *native_code = jl_create_native(m2, NULL, NULL, 0, 1); + JL_GC_POP(); + return native_code; +} + +static void *jl_precompile(int all) +{ + // array of MethodInstances and ccallable aliases to include in the output + jl_array_t *m = jl_alloc_vec_any(0); + JL_GC_PUSH1(&m); + if (all) + jl_compile_all_defs(m); + jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m); + void *native_code = jl_precompile_(m); + JL_GC_POP(); + return native_code; +} + +static void *jl_precompile_worklist(jl_array_t *worklist) +{ + if (!worklist) + return NULL; + // this "found" array will contain function + // type signatures that were inferred but haven't been compiled + jl_array_t *m = jl_alloc_vec_any(0); + JL_GC_PUSH1(&m); + size_t i, nw = jl_array_len(worklist); + for (i = 0; i < nw; i++) { + jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i); + assert(jl_is_module(mod)); + foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m); + } + void *native_code = jl_precompile_(m); JL_GC_POP(); return native_code; } diff --git a/src/processor.cpp b/src/processor.cpp index b9dfc2b7f0b4e6..df114b4d802575 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -627,10 +627,14 @@ static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) // .data base char *data_base; - jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 1); + if (!jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 0)) { + data_base = NULL; + } // .text base char *text_base; - jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 1); + if (!jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 0)) { + text_base = NULL; + } res.base = text_base; int32_t *offsets; @@ -713,6 +717,7 @@ static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) if (reloc_idx == idx) { found = true; auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]); + assert(slot); *slot = offset + res.base; } else if (reloc_idx > idx) { diff --git a/src/processor.h b/src/processor.h index 4b9071fb4f663f..ac00f8874141bd 100644 --- a/src/processor.h +++ b/src/processor.h @@ -166,6 +166,7 @@ typedef struct _jl_sysimg_fptrs_t { * Return the data about the function pointers selected. */ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl); +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl); // Return the name of the host CPU as a julia string. JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void); diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index eaa950662d0dea..f7a112993e3e54 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1586,6 +1586,20 @@ static uint32_t sysimg_init_cb(const void *id) return match.best_idx; } +static uint32_t pkgimg_init_cb(const void *id) +{ + TargetData target = jit_targets.front(); + auto pkgimg = deserialize_target_data((const uint8_t*)id); + for (auto &t: pkgimg) { + if (auto nname = normalize_cpu_name(t.name)) { + t.name = nname; + } + } + auto match = match_sysimg_targets(pkgimg, target, max_vector_size); + + return match.best_idx; +} + static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -1795,6 +1809,15 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) return parse_sysimg(hdl, sysimg_init_cb); } +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +{ + if (jit_targets.empty()) + jl_error("JIT targets not initialized"); + if (jit_targets.size() > 1) + jl_error("Expected only one JIT target"); + return parse_sysimg(hdl, pkgimg_init_cb); +} + std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 1f314eb460f0f2..3160bd0ba67506 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -51,6 +51,22 @@ static uint32_t sysimg_init_cb(const void *id) return best_idx; } +static uint32_t pkgimg_init_cb(const void *id) +{ + TargetData<1> target = jit_targets.front(); + // Find the last name match or use the default one. + uint32_t best_idx = 0; + auto pkgimg = deserialize_target_data<1>((const uint8_t*)id); + for (uint32_t i = 0; i < pkgimg.size(); i++) { + auto &imgt = pkgimg[i]; + if (imgt.name == target.name) { + best_idx = i; + } + } + + return best_idx; +} + static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -103,6 +119,15 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) return parse_sysimg(hdl, sysimg_init_cb); } +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +{ + if (jit_targets.empty()) + jl_error("JIT targets not initialized"); + if (jit_targets.size() > 1) + jl_error("Expected only one JIT target"); + return parse_sysimg(hdl, pkgimg_init_cb); +} + std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 77ee5afaf5e853..b73838a55777e6 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -878,6 +878,19 @@ static uint32_t sysimg_init_cb(const void *id) return match.best_idx; } +static uint32_t pkgimg_init_cb(const void *id) +{ + TargetData target = jit_targets.front(); + auto pkgimg = deserialize_target_data((const uint8_t*)id); + for (auto &t: pkgimg) { + if (auto nname = normalize_cpu_name(t.name)) { + t.name = nname; + } + } + auto match = match_sysimg_targets(pkgimg, target, max_vector_size); + return match.best_idx; +} + static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -1018,6 +1031,15 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) return parse_sysimg(hdl, sysimg_init_cb); } +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +{ + if (jit_targets.empty()) + jl_error("JIT targets not initialized"); + if (jit_targets.size() > 1) + jl_error("Expected only one JIT target"); + return parse_sysimg(hdl, pkgimg_init_cb); +} + extern "C" JL_DLLEXPORT std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/rtutils.c b/src/rtutils.c index 497b348f871d54..f34303b9aeea53 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -708,6 +708,12 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt n += jl_static_show_x(out, (jl_value_t*)vt, depth); n += jl_printf(out, ">"); } + else if (vt == (jl_datatype_t*)jl_buff_tag) { + n += jl_printf(out, "", (void*)v); + } + else if (vt == (jl_datatype_t*)(uintptr_t)(0xbabababababababaull & ~15)) { + n += jl_printf(out, "", (void*)v); + } // These need to be special cased because they // exist only by pointer identity in early startup else if (v == (jl_value_t*)jl_simplevector_type) { diff --git a/src/staticdata.c b/src/staticdata.c index ff958b0d3c30f2..e1f0f86aa68fc3 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -4,33 +4,24 @@ /* saving and restoring system images - This performs serialization and deserialization of in-memory data. The dump.c file is similar, but has less complete coverage: - dump.c has no knowledge of native code (and simply discards it), whereas this supports native code caching in .o files. - Duplication is avoided by elevating the .o-serialized versions of global variables and native-compiled functions to become - the authoritative source for such entities in the system image, with references to these objects appropriately inserted into - the (de)serialized version of Julia's internal data. This makes deserialization simple and fast: we only need to deal with - pointer relocation, registering with the garbage collector, and making note of special internal types. During serialization, - we also need to pay special attention to things like builtin functions, C-implemented types (those in jltypes.c), the metadata - for documentation, optimal layouts, integration with native system image generation, and preparing other preprocessing - directives. - - dump.c has capabilities missing from this serializer, most notably the ability to handle external references. This is not needed - for system images as they are self-contained. However, it would be needed to support incremental compilation of packages. + This performs serialization and deserialization of system and package images. It creates and saves a compact binary + blob, making deserialization "simple" and fast: we "only" need to deal with uniquing, pointer relocation, + method root insertion, registering with the garbage collector, making note of special internal types, and + backedges/invalidation. Special objects include things like builtin functions, C-implemented types (those in jltypes.c), + the metadata for documentation, optimal layouts, integration with native system image generation, and preparing other + preprocessing directives. During serialization, the flow has several steps: - - step 1 inserts relevant items into `backref_table`, an `obj` => `id::Int` mapping. `id` is assigned by - order of insertion. This is effectively a recursive traversal, singling out items like pointers and symbols - that need restoration when the system image is loaded. This stage is implemented by `jl_serialize_value` - and its callees; while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked + - step 1 inserts relevant items into `serialization_order`, an `obj` => `id::Int` mapping. `id` is assigned by + order of insertion. This stage is implemented by `jl_queue_for_serialization` and its callees; + while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked using a work-queue managed by `jl_serialize_reachable`. - It's worth emphasizing that despite the name `jl_serialize_value`, the only goal of this stage is to - insert objects into `backref_table`. The entire system gets inserted, either directly or indirectly via - fields of other objects. Objects requiring pointer relocation or gc registration must be inserted directly. - In later stages, such objects get referenced by their `id`. + It's worth emphasizing that the only goal of this stage is to insert objects into `serialization_order`. + In later stages, such objects get written in order of `id`. - - step 2 (the biggest of four steps) takes all items in `backref_table` and actually serializes them ordered + - step 2 (the biggest of four steps) takes all items in `serialization_order` and actually serializes them ordered by `id`. The system is serialized into several distinct streams (see `jl_serializer_state`), a "main stream" (the `s` field) as well as parallel streams for writing specific categories of additional internal data (e.g., global data invisible to codegen, as well as deserialization "touch-up" tables, see below). These different streams @@ -47,14 +38,36 @@ one of the corresponding categorical list, then `index = t << RELOC_TAG_OFFSET + i`. The simplest source for the details of this encoding can be found in the pair of functions `get_reloc_for_item` and `get_item_for_reloc`. + `uniquing` also holds the serialized location of external DataTypes, MethodInstances, and singletons + in the serialized blob (i.e., new-at-the-time-of-serialization specializations). + Most of step 2 is handled by `jl_write_values`, followed by special handling of the dedicated parallel streams. - step 3 combines the different sections (fields of `jl_serializer_state`) into one - - step 4 writes the values of the hard-coded tagged items and `reinit_list`/`ccallable_list` - -The tables written to the serializer stream make deserialization fairly straightforward. Much of the "real work" is -done by `get_item_for_reloc`. + - step 4 writes the values of the hard-coded tagged items and `ccallable_list` + +Much of the "real work" during deserialization is done by `get_item_for_reloc`. But a few items require specific +attention: +- uniquing: during deserialization, the target item (an "external" type or MethodInstance) must be checked against + the running system to see whether such an object already exists (i.e., whether some other previously-loaded package + or workload has created such types/MethodInstances previously) or whether it needs to be created de-novo. + In either case, all references at `location` must be updated to the one in the running system. + `new_dt_objs` is a hash set of newly allocated datatype-reachable objects +- method root insertion: when new specializations generate new roots, these roots must be inserted into + method root tables +- backedges & invalidation: external edges have to be checked against the running system and any invalidations executed. + +Encoding of a pointer: +- in the location of the pointer, we initially write zero padding +- for both relocs_list and gctags_list, we write loc/backrefid (for gctags_list this is handled by the caller of write_gctaggedfield, + for relocs_list it's handled by write_pointerfield) +- when writing to disk, both call get_reloc_for_item, and its return value (subject to modification by gc bits) + ends up being written into the data stream (s->s), and the data stream's position written to s->relocs + +External links: +- location holds the offset +- loc/0 in relocs_list */ #include @@ -75,6 +88,8 @@ done by `get_item_for_reloc`. #include "valgrind.h" #include "julia_assert.h" +#include "staticdata_utils.c" + #ifdef __cplusplus extern "C" { #endif @@ -272,23 +287,27 @@ static uintptr_t nsym_tag; // array of definitions for the predefined tagged object types // (reverse of symbol_table) static arraylist_t deser_sym; - -// table of all objects that are serialized -static htable_t backref_table; -static int backref_table_numel; -static arraylist_t layout_table; // cache of `position(s)` for each `id` in `backref_table` +// Predefined tags that do not have special handling in `externally_linked` +static htable_t external_objects; + +static htable_t serialization_order; // to break cycles, mark all objects that are serialized +static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them. +static htable_t nullptrs; +static htable_t bindings; // because they are not first-class objects +// FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization +// must be "toplevel" in this queue. For types, parameters and field types must appear +// before the "wrapper" type so they can be properly recached against the running system. +static arraylist_t serialization_queue; +static arraylist_t layout_table; // cache of `position(s)` for each `id` in `serialization_order` static arraylist_t object_worklist; // used to mimic recursion by jl_serialize_reachable -// Both `reinit_list` and `ccallable_list` are lists of (size_t pos, code) entries -// for the serializer to mark values in need of rework during deserialization -// codes: -// 1: typename (reinit_list) -// 2: module (reinit_list) -// 3: method (ccallable_list) -static arraylist_t reinit_list; - -// @ccallable entry points to install -static arraylist_t ccallable_list; +// Permanent list of void* (begin, end+1) pairs of system/package images we've loaded previously +// togther with their module build_ids (used for external linkage) +// jl_linkage_blobs.items[2i:2i+1] correspond to jl_build_ids[i] (0-offset indexing) +// TODO: Keep this sorted so that we can use binary-search +arraylist_t jl_linkage_blobs; +arraylist_t jl_image_relocs; +jl_array_t *jl_build_ids JL_GLOBALLY_ROOTED = NULL; // hash of definitions for predefined function pointers static htable_t fptr_to_id; @@ -297,7 +316,12 @@ void *native_functions; // opaque jl_native_code_desc_t blob used for fetching // table of struct field addresses to rewrite during saving static htable_t field_replace; -static htable_t layout_cache; +typedef struct { + uint64_t base; + uintptr_t *gvars_base; + int32_t *gvars_offsets; + jl_sysimg_fptrs_t fptrs; +} jl_image_t; // array of definitions for the predefined function pointers // (reverse of fptr_to_id) @@ -326,26 +350,42 @@ typedef struct { ios_t *fptr_record; // serialized array mapping fptrid => spos arraylist_t relocs_list; // a list of (location, target) pairs, see description at top arraylist_t gctags_list; // " + arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated + arraylist_t uniquing_objs; // a list of locations that reference non-types that must be de-duplicated + arraylist_t fixup_types; // a list of locations of types requiring (re)caching + arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching + arraylist_t ccallable_list; // @ccallable entry points to install + // record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg + // conceptually, the base pointer for the jth externally-linked item is determined from + // i = findfirst(==(link_ids[j]), jl_build_ids) + // blob_base = jl_linkage_blobs.items[2i] # 0-offset indexing + // We need separate lists since they are intermingled at creation but split when written. + jl_array_t *link_ids_relocs; + jl_array_t *link_ids_gctags; + jl_array_t *link_ids_gvars; jl_ptls_t ptls; + htable_t callers_with_edges; + jl_image_t *image; + int8_t incremental; } jl_serializer_state; static jl_value_t *jl_idtable_type = NULL; static jl_typename_t *jl_idtable_typename = NULL; static jl_value_t *jl_bigint_type = NULL; static int gmp_limb_size = 0; - static jl_sym_t *jl_docmeta_sym = NULL; // Tags of category `t` are located at offsets `t << RELOC_TAG_OFFSET` // Consequently there is room for 2^RELOC_TAG_OFFSET pointers, etc enum RefTags { - DataRef, // mutable data - ConstDataRef, // constant data (e.g., layouts) - TagRef, // items serialized via their tags - SymbolRef, // symbols - BindingRef, // module bindings - FunctionRef, // generic functions - BuiltinFunctionRef // builtin functions + DataRef, // mutable data + ConstDataRef, // constant data (e.g., layouts) + TagRef, // items serialized via their tags + SymbolRef, // symbols + BindingRef, // module bindings + FunctionRef, // generic functions + BuiltinFunctionRef, // builtin functions + ExternalLinkage // items defined externally (used when serializing packages) }; // calling conventions for internal entry points. @@ -384,17 +424,29 @@ static void write_reloc_t(ios_t *s, uintptr_t reloc_id) JL_NOTSAFEPOINT } } -// --- Static Compile --- +static int jl_is_binding(uintptr_t v) JL_NOTSAFEPOINT +{ + return jl_typeis(v, (jl_datatype_t*)jl_buff_tag); +} + +// Reporting to PkgCacheInspector +typedef struct { + size_t sysdata; + size_t isbitsdata; + size_t symboldata; + size_t tagslist; + size_t reloclist; + size_t gvarlist; + size_t fptrlist; +} pkgcachesizes; +// --- Static Compile --- static void *jl_sysimg_handle = NULL; -static uint64_t sysimage_base = 0; -static uintptr_t *sysimg_gvars_base = NULL; -static const int32_t *sysimg_gvars_offsets = NULL; -static jl_sysimg_fptrs_t sysimg_fptrs; +static jl_image_t sysimage; -static inline uintptr_t *sysimg_gvars(uintptr_t *base, size_t idx) +static inline uintptr_t *sysimg_gvars(uintptr_t *base, int32_t *offsets, size_t idx) { - return base + sysimg_gvars_offsets[idx] / sizeof(base[0]); + return base + offsets[idx] / sizeof(base[0]); } JL_DLLEXPORT int jl_running_on_valgrind(void) @@ -407,10 +459,10 @@ static void jl_load_sysimg_so(void) int imaging_mode = jl_generating_output() && !jl_options.incremental; // in --build mode only use sysimg data, not precompiled native code if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) { - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 1); - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1); - sysimg_gvars_offsets += 1; - assert(sysimg_fptrs.base); + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimage.gvars_base, 1); + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimage.gvars_offsets, 1); + sysimage.gvars_offsets += 1; + assert(sysimage.fptrs.base); void *pgcstack_func_slot; jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1); @@ -423,19 +475,19 @@ static void jl_load_sysimg_so(void) *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); #ifdef _OS_WINDOWS_ - sysimage_base = (intptr_t)jl_sysimg_handle; + sysimage.base = (intptr_t)jl_sysimg_handle; #else Dl_info dlinfo; - if (dladdr((void*)sysimg_gvars_base, &dlinfo) != 0) { - sysimage_base = (intptr_t)dlinfo.dli_fbase; + if (dladdr((void*)sysimage.gvars_base, &dlinfo) != 0) { + sysimage.base = (intptr_t)dlinfo.dli_fbase; } else { - sysimage_base = 0; + sysimage.base = 0; } #endif } else { - memset(&sysimg_fptrs, 0, sizeof(sysimg_fptrs)); + memset(&sysimage.fptrs, 0, sizeof(sysimage.fptrs)); } const char *sysimg_data; jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1); @@ -447,6 +499,94 @@ static void jl_load_sysimg_so(void) // --- serializer --- +#define NBOX_C 1024 + +static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) +{ + // ignore items that are given a special relocation representation + if (s->incremental && jl_object_in_image(v)) + return 0; + + if (v == NULL || jl_is_symbol(v) || v == jl_nothing) { + return 0; + } + else if (jl_typeis(v, jl_int64_type)) { + int64_t i64 = *(int64_t*)v + NBOX_C / 2; + if ((uint64_t)i64 < NBOX_C) + return 0; + } + else if (jl_typeis(v, jl_int32_type)) { + int32_t i32 = *(int32_t*)v + NBOX_C / 2; + if ((uint32_t)i32 < NBOX_C) + return 0; + } + else if (jl_typeis(v, jl_uint8_type)) { + return 0; + } + else if (jl_typeis(v, jl_task_type)) { + return 0; + } + + return 1; +} + + +static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT +{ + if (jl_is_method_instance(v)) { + jl_method_instance_t *mi = (jl_method_instance_t*)v; + jl_value_t *m = mi->def.value; + if (jl_is_method(m) && jl_object_in_image(m)) + return 1 + type_in_worklist(mi->specTypes); + } + if (jl_is_datatype(v)) { + jl_datatype_t *dt = (jl_datatype_t*)v; + if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) + return 0; // aka !is_cacheable from jltypes.c + if (jl_object_in_image((jl_value_t*)dt->name)) + return 1 + type_in_worklist(v); + } + jl_value_t *dtv = jl_typeof(v); + if (jl_is_datatype_singleton((jl_datatype_t*)dtv)) { + return 1 - type_in_worklist(dtv); // these are already recached in the datatype in the image + } + return 0; +} + +static int needs_recaching(jl_value_t *v) JL_NOTSAFEPOINT +{ + return caching_tag(v) == 2; +} + +static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT +{ + assert(!jl_object_in_image(v)); + return caching_tag(v) == 1; +} + +static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT +{ + ptrhash_put(&field_replace, (void*)addr, newval); +} + +static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED +{ + jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr); + if (fld == HT_NOTFOUND) { + fld = *addr; + if (mutabl && fld && jl_is_cpointer_type(jl_typeof(fld)) && jl_unbox_voidpointer(fld) != NULL && jl_unbox_voidpointer(fld) != (void*)(uintptr_t)-1) { + void **nullval = ptrhash_bp(&nullptrs, (void*)jl_typeof(fld)); + if (*nullval == HT_NOTFOUND) { + void *C_NULL = NULL; + *nullval = (void*)jl_new_bits(jl_typeof(fld), &C_NULL); + } + fld = (jl_value_t*)*nullval; + } + return fld; + } + return fld; +} + static uintptr_t jl_fptr_id(void *fptr) { void **pbp = ptrhash_bp(&fptr_to_id, fptr); @@ -456,113 +596,126 @@ static uintptr_t jl_fptr_id(void *fptr) return *(uintptr_t*)pbp; } -#define jl_serialize_value(s, v) jl_serialize_value_(s,(jl_value_t*)(v),1) -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive); +// `jl_queue_for_serialization` adds items to `serialization_order` +#define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0) +static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate); -static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) +static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) { - jl_serialize_value(s, m->name); - jl_serialize_value(s, m->parent); + jl_queue_for_serialization(s, m->name); + jl_queue_for_serialization(s, m->parent); size_t i; void **table = m->bindings.table; for (i = 0; i < m->bindings.size; i += 2) { if (table[i+1] != HT_NOTFOUND) { - jl_serialize_value(s, (jl_value_t*)table[i]); + jl_queue_for_serialization(s, (jl_value_t*)table[i]); jl_binding_t *b = (jl_binding_t*)table[i+1]; - jl_serialize_value(s, b->name); + ptrhash_put(&bindings, b, (void*)(uintptr_t)-1); + jl_queue_for_serialization(s, b->name); + jl_value_t *value; if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata) - jl_serialize_value(s, jl_nothing); + value = jl_nothing; else - jl_serialize_value(s, jl_atomic_load_relaxed(&b->value)); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref)); - jl_serialize_value(s, b->owner); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty)); + value = get_replaceable_field((jl_value_t**)&b->value, !b->constp); + jl_queue_for_serialization(s, value); + jl_queue_for_serialization(s, jl_atomic_load_relaxed(&b->globalref)); + jl_queue_for_serialization(s, b->owner); + jl_queue_for_serialization(s, jl_atomic_load_relaxed(&b->ty)); } } for (i = 0; i < m->usings.len; i++) { - jl_serialize_value(s, (jl_value_t*)m->usings.items[i]); + jl_queue_for_serialization(s, (jl_value_t*)m->usings.items[i]); } } -static jl_value_t *get_replaceable_field(jl_value_t **addr) +// Anything that requires uniquing or fixing during deserialization needs to be "toplevel" +// in serialization (i.e., have its own entry in `serialization_order`). Consequently, +// objects that act as containers for other potentially-"problematic" objects must add such "children" +// to the queue. +// Most objects use preorder traversal. But things that need uniquing require postorder: +// you want to handle uniquing of `Dict{String,Float64}` before you tackle `Vector{Dict{String,Float64}}`. +// Uniquing is done in `serialization_order`, so the very first mention of such an object must +// be the "source" rather than merely a cross-reference. +static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) { - jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr); - if (fld == HT_NOTFOUND) - return *addr; - return fld; -} - -#define NBOX_C 1024 - -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive) -{ - // ignore items that are given a special representation - if (v == NULL || jl_is_symbol(v) || v == jl_nothing) { - return; - } - else if (jl_typeis(v, jl_task_type)) { - if (v == (jl_value_t*)s->ptls->root_task) { - jl_serialize_value(s, ((jl_task_t*)v)->tls); - return; + jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); + jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate); + + if (!recursive) + goto done_fields; + + if (s->incremental && jl_is_datatype(v) && immediate) { + jl_datatype_t *dt = (jl_datatype_t*)v; + // ensure super is queued (though possibly not yet handled, since it may have cycles) + jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1); + // ensure all type parameters are recached + jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1); + jl_value_t *singleton = dt->instance; + if (singleton && needs_uniquing(singleton)) { + assert(jl_needs_serialization(s, singleton)); // should be true, since we visited dt + // do not visit dt->instance for our template object as it leads to unwanted cycles here + // (it may get serialized from elsewhere though) + record_field_change(&dt->instance, jl_nothing); + } + immediate = 0; // do not handle remaining fields immediately (just field types remains) + } + if (s->incremental && jl_is_method_instance(v)) { + if (needs_uniquing(v)) { + // we only need 3 specific fields of this (the rest are not used) + jl_method_instance_t *mi = (jl_method_instance_t*)v; + jl_queue_for_serialization(s, mi->def.value); + jl_queue_for_serialization(s, mi->specTypes); + jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals); + recursive = 0; + goto done_fields; + } + else if (needs_recaching(v)) { + // we only need 3 specific fields of this (the rest are restored afterward, if valid) + jl_method_instance_t *mi = (jl_method_instance_t*)v; + record_field_change((jl_value_t**)&mi->uninferred, NULL); + record_field_change((jl_value_t**)&mi->backedges, NULL); + record_field_change((jl_value_t**)&mi->callbacks, NULL); + record_field_change((jl_value_t**)&mi->cache, NULL); } } - else if (jl_typeis(v, jl_int64_type)) { - int64_t i64 = *(int64_t*)v + NBOX_C / 2; - if ((uint64_t)i64 < NBOX_C) - return; - } - else if (jl_typeis(v, jl_int32_type)) { - int32_t i32 = *(int32_t*)v + NBOX_C / 2; - if ((uint32_t)i32 < NBOX_C) - return; - } - else if (jl_typeis(v, jl_uint8_type)) { - return; - } - arraylist_push(&object_worklist, (void*)((uintptr_t)v | recursive)); -} - -static void jl_serialize_value__(jl_serializer_state *s, jl_value_t *v, int recursive) -{ - void **bp = ptrhash_bp(&backref_table, v); - if (*bp != HT_NOTFOUND) { - return; + if (jl_is_typename(v)) { + jl_typename_t *tn = (jl_typename_t*)v; + // don't recurse into several fields (yet) + jl_queue_for_serialization_(s, (jl_value_t*)tn->cache, 0, 1); + jl_queue_for_serialization_(s, (jl_value_t*)tn->linearcache, 0, 1); + if (s->incremental) { + assert(!jl_object_in_image((jl_value_t*)tn->module)); + assert(!jl_object_in_image((jl_value_t*)tn->wrapper)); + } } - size_t item = ++backref_table_numel; - assert(item < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize"); - char *pos = (char*)HT_NOTFOUND + item; - *bp = (void*)pos; - - // some values have special representations - jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - jl_serialize_value(s, t); + if (immediate) // must be things that can be recursively handled, and valid as type parameters + assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v)); - if (t->layout->npointers == 0) { - // skip it + const jl_datatype_layout_t *layout = t->layout; + if (layout->npointers == 0) { + // bitstypes do not require recursion } else if (jl_is_svec(v)) { - if (!recursive) - return; size_t i, l = jl_svec_len(v); jl_value_t **data = jl_svec_data(v); for (i = 0; i < l; i++) { - jl_serialize_value(s, data[i]); + jl_queue_for_serialization_(s, data[i], 1, immediate); } } else if (jl_is_array(v)) { jl_array_t *ar = (jl_array_t*)v; - jl_serialize_value(s, jl_typeof(ar)); + const char *data = (const char*)jl_array_data(ar); if (ar->flags.ptrarray) { size_t i, l = jl_array_len(ar); for (i = 0; i < l; i++) { - jl_serialize_value(s, jl_array_ptr_ref(ar, i)); + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[i], 1); + jl_queue_for_serialization_(s, fld, 1, immediate); } } else if (ar->flags.hasptr) { - const char *data = (const char*)jl_array_data(ar); uint16_t elsz = ar->elsize; size_t i, l = jl_array_len(ar); jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(ar)); @@ -570,46 +723,90 @@ static void jl_serialize_value__(jl_serializer_state *s, jl_value_t *v, int recu for (i = 0; i < l; i++) { for (j = 0; j < np; j++) { uint32_t ptr = jl_ptr_offset(et, j); - jl_value_t *fld = ((jl_value_t**)data)[ptr]; - JL_GC_PROMISE_ROOTED(fld); - jl_serialize_value(s, fld); + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], 1); + jl_queue_for_serialization_(s, fld, 1, immediate); } data += elsz; } } } else if (jl_typeis(v, jl_module_type)) { - jl_serialize_module(s, (jl_module_t*)v); + jl_queue_module_for_serialization(s, (jl_module_t*)v); } - else if (jl_is_typename(v)) { - jl_typename_t *tn = (jl_typename_t*)v; - jl_serialize_value(s, tn->name); - jl_serialize_value(s, tn->module); - jl_serialize_value(s, tn->names); - jl_serialize_value(s, tn->wrapper); - jl_serialize_value(s, tn->Typeofwrapper); - jl_serialize_value_(s, (jl_value_t*)tn->cache, 0); - jl_serialize_value_(s, (jl_value_t*)tn->linearcache, 0); - jl_serialize_value(s, tn->mt); - jl_serialize_value(s, tn->partial); - } - else if (t->layout->nfields > 0) { - if (jl_typeis(v, jl_globalref_type)) { - // Don't save the cached binding reference in staticdata - ((jl_globalref_t*)v)->bnd_cache = NULL; - } + else if (layout->nfields > 0) { char *data = (char*)jl_data_ptr(v); - size_t i, np = t->layout->npointers; + size_t i, np = layout->npointers; for (i = 0; i < np; i++) { uint32_t ptr = jl_ptr_offset(t, i); - jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr]); - jl_serialize_value(s, fld); + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], t->name->mutabl); + jl_queue_for_serialization_(s, fld, 1, immediate); } } + +done_fields: ; + + // We've encountered an item we need to cache + void **bp = ptrhash_bp(&serialization_order, v); + assert(*bp != (void*)(uintptr_t)-1); + if (s->incremental) { + void **bp2 = ptrhash_bp(&unique_ready, v); + if (*bp2 == HT_NOTFOUND) + assert(*bp == (void*)(uintptr_t)-2); + else if (*bp != (void*)(uintptr_t)-2) + return; + } + else { + assert(*bp == (void*)(uintptr_t)-2); + } + arraylist_push(&serialization_queue, (void*) v); + size_t idx = serialization_queue.len - 1; + assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize"); + + *bp = (void*)((char*)HT_NOTFOUND + 1 + idx); +} + +static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) +{ + if (!jl_needs_serialization(s, v)) + return; + + jl_value_t *t = jl_typeof(v); + // Items that require postorder traversal must visit their children prior to insertion into + // the worklist/serialization_order (and also before their first use) + if (s->incremental && !immediate) { + if (jl_is_datatype(t) && needs_uniquing(v)) + immediate = 1; + if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v)) + immediate = 1; + } + + void **bp = ptrhash_bp(&serialization_order, v); + if (*bp == HT_NOTFOUND) { + *bp = (void*)(uintptr_t)(immediate ? -2 : -1); + } + else { + if (!s->incremental || !immediate || !recursive) + return; + void **bp2 = ptrhash_bp(&unique_ready, v); + if (*bp2 == HT_NOTFOUND) + *bp2 = v; // now is unique_ready + else { + assert(*bp != (void*)(uintptr_t)-1); + return; // already was unique_ready + } + assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then + if (*bp == (void*)(uintptr_t)-1) + *bp = (void*)(uintptr_t)-2; // now immediate + } + + if (immediate) + jl_insert_into_serialization_queue(s, v, recursive, immediate); + else + arraylist_push(&object_worklist, (void*)v); } // Do a pre-order traversal of the to-serialize worklist, in the identical order -// to the calls to jl_serialize_value would occur in a purely recursive +// to the calls to jl_queue_for_serialization would occur in a purely recursive // implementation, but without potentially running out of stack. static void jl_serialize_reachable(jl_serializer_state *s) { @@ -624,10 +821,16 @@ static void jl_serialize_reachable(jl_serializer_state *s) object_worklist.items[j] = tmp; } prevlen = --object_worklist.len; - uintptr_t v = (uintptr_t)object_worklist.items[prevlen]; - int recursive = v & 1; - v &= ~(uintptr_t)1; // untag v - jl_serialize_value__(s, (jl_value_t*)v, recursive); + jl_value_t *v = (jl_value_t*)object_worklist.items[prevlen]; + void **bp = ptrhash_bp(&serialization_order, (void*)v); + assert(*bp != HT_NOTFOUND && *bp != (void*)(uintptr_t)-2); + if (*bp == (void*)(uintptr_t)-1) { // might have been eagerly handled for post-order while in the lazy pre-order queue + *bp = (void*)(uintptr_t)-2; + jl_insert_into_serialization_queue(s, v, 1, 0); + } + else { + assert(s->incremental); + } } } @@ -641,19 +844,6 @@ static void ios_ensureroom(ios_t *s, size_t newsize) JL_NOTSAFEPOINT } } -// Maybe encode a global variable. `gid` is the LLVM index, 0 if the object is not serialized -// in the generated code (and thus not a gvar from that standpoint, maybe only stored in the internal-data sysimg). -// `reloc_id` is the RefTags-encoded `target`. -static void record_gvar(jl_serializer_state *s, int gid, uintptr_t reloc_id) JL_NOTSAFEPOINT -{ - if (gid == 0) - return; - ios_ensureroom(s->gvar_record, gid * sizeof(reloc_t)); - ios_seek(s->gvar_record, (gid - 1) * sizeof(reloc_t)); - write_reloc_t(s->gvar_record, reloc_id); -} - - static void write_padding(ios_t *s, size_t nb) JL_NOTSAFEPOINT { static const char zeros[16] = {0}; @@ -672,11 +862,34 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT write_uint(s, 0); } -// Return the integer `id` for `v`. Generically this is looked up in `backref_table`, +// Records the buildid holding `v` and returns the tagged offset within the corresponding image +static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) { + size_t i = external_blob_index(v); + if (i < n_linkage_blobs()) { + assert(link_ids && jl_is_array(link_ids)); + assert(jl_build_ids && jl_is_array(jl_build_ids)); + uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids); + // We found the sysimg/pkg that this item links against + // Store the image key in `link_ids` + jl_array_grow_end(link_ids, 1); + uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids); + link_id_data[jl_array_len(link_ids)-1] = build_id_data[i]; + // Compute the relocation code + size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i]; + offset /= sizeof(void*); + assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large"); + // jl_printf(JL_STDOUT, "External link %ld against blob %d with key %ld at position 0x%lx with offset 0x%lx to \n", jl_array_len(link_ids), i, build_id_data[i>>1], ios_pos(s->s), offset); + // jl_(v); + return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset; + } + return 0; +} + +// Return the integer `id` for `v`. Generically this is looked up in `serialization_order`, // but symbols, small integers, and a couple of special items (`nothing` and the root Task) // have special handling. -#define backref_id(s, v) _backref_id(s, (jl_value_t*)(v)) -static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPOINT +#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), link_ids) +static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT { assert(v != NULL && "cannot get backref to NULL object"); void *idx = HT_NOTFOUND; @@ -713,21 +926,44 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO uint8_t u8 = *(uint8_t*)v; return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C; } + if (s->incremental && jl_object_in_image(v)) { + assert(link_ids); + uintptr_t item = add_external_linkage(s, v, link_ids); + assert(item && "no external linkage identified"); + return item; + } if (idx == HT_NOTFOUND) { - idx = ptrhash_get(&backref_table, v); - assert(idx != HT_NOTFOUND && "object missed during jl_serialize_value pass"); + idx = ptrhash_get(&serialization_order, v); + if (idx == HT_NOTFOUND) { + jl_(jl_typeof(v)); + jl_(v); + } + assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass"); + assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass"); + assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass"); } return (char*)idx - 1 - (char*)HT_NOTFOUND; } +static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t offset) JL_NOTSAFEPOINT +{ + if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) { + if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld))) + arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset); + else + arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset); + } +} + // Save blank space in stream `s` for a pointer `fld`, storing both location and target // in `relocs_list`. static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSAFEPOINT { if (fld != NULL) { arraylist_push(&s->relocs_list, (void*)(uintptr_t)ios_pos(s->s)); - arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); + record_uniquing(s, fld, ios_pos(s->s)); } write_pointer(s->s); } @@ -736,26 +972,29 @@ static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSA // in `gctags_list`. static void write_gctaggedfield(jl_serializer_state *s, uintptr_t ref) JL_NOTSAFEPOINT { + // jl_printf(JL_STDOUT, "gctaggedfield: position %p, value 0x%lx\n", (void*)(uintptr_t)ios_pos(s->s), ref); arraylist_push(&s->gctags_list, (void*)(uintptr_t)ios_pos(s->s)); arraylist_push(&s->gctags_list, (void*)ref); write_pointer(s->s); } // Special handling from `jl_write_values` for modules -static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m) +static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m) JL_GC_DISABLED { size_t reloc_offset = ios_pos(s->s); size_t tot = sizeof(jl_module_t); ios_write(s->s, (char*)m, tot); // raw memory dump of the `jl_module_t` structure + // will need to recreate the binding table for this + arraylist_push(&s->fixup_objs, (void*)reloc_offset); // Handle the fields requiring special attention jl_module_t *newm = (jl_module_t*)&s->s->buf[reloc_offset]; newm->name = NULL; arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, name))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->name)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->name, s->link_ids_relocs)); newm->parent = NULL; arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent, s->link_ids_relocs)); newm->primary_world = jl_atomic_load_acquire(&jl_world_counter); // write out the bindings table as a list @@ -772,13 +1011,14 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t write_gctaggedfield(s, (uintptr_t)BindingRef << RELOC_TAG_OFFSET); tot += sizeof(void*); size_t binding_reloc_offset = ios_pos(s->s); - record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b), - ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset); + ptrhash_put(&bindings, b, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset)); write_pointerfield(s, (jl_value_t*)b->name); + jl_value_t *value; if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata) - write_pointerfield(s, jl_nothing); + value = jl_nothing; else - write_pointerfield(s, jl_atomic_load_relaxed(&b->value)); + value = get_replaceable_field((jl_value_t**)&b->value, !b->constp); + write_pointerfield(s, value); write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref)); write_pointerfield(s, (jl_value_t*)b->owner); write_pointerfield(s, jl_atomic_load_relaxed(&b->ty)); @@ -803,7 +1043,7 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t size_t i; for (i = 0; i < m->usings.len; i++) { arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[i]))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i])); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i], s->link_ids_relocs)); } } else { @@ -822,92 +1062,74 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t } } -#if 0 -static size_t jl_sort_size(jl_datatype_t *dt) +static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT { - if (dt == jl_simplevector_type) - return SIZE_MAX - 5; - if (dt == jl_string_type) - return SIZE_MAX - 4; - if (dt->name == jl_array_typename) - return SIZE_MAX - 3; - if (dt == jl_datatype_type) - return SIZE_MAX - 2; - if (dt == jl_module_type) - return SIZE_MAX - 1; - return jl_datatype_size(dt); -} -#endif - -// Used by `qsort` to order `backref_table` by `id` -static int sysimg_sort_order(const void *pa, const void *pb) -{ - uintptr_t sa = ((uintptr_t*)pa)[1]; - uintptr_t sb = ((uintptr_t*)pb)[1]; - return (sa > sb ? 1 : (sa < sb ? -1 : 0)); -#if 0 - jl_value_t *a = *(jl_value_t**)pa; - jl_datatype_t *tya = (jl_datatype_t*)jl_typeof(a); - size_t sa = jl_sort_size(tya); - jl_value_t *b = *(jl_value_t**)pb; - jl_datatype_t *tyb = (jl_datatype_t*)jl_typeof(b); - size_t sb = jl_sort_size(tyb); - if (sa == sb) { - sa = tya->uid; - sb = tyb->uid; - } - return (sa > sb ? 1 : (sa < sb ? -1 : 0)); -#endif + for (size_t i = 0; i < globals->len; i++) { + void *g = globals->items[i]; + if (jl_is_binding((uintptr_t)g)) { + if (!ptrhash_has(&bindings, g)) { + // need to deal with foreign bindings here too + assert(s->incremental); + jl_binding_t *b = (jl_binding_t*)g; + jl_value_t *gr = jl_module_globalref(b->owner, b->name); + jl_queue_for_serialization(s, gr); + } + continue; + } + assert(!ptrhash_has(&bindings, g)); + jl_queue_for_serialization(s, g); + } } jl_value_t *jl_find_ptr = NULL; -// The main function for serializing all the items queued in `backref_table` -static void jl_write_values(jl_serializer_state *s) +// The main function for serializing all the items queued in `serialization_order` +// (They are also stored in `serialization_queue` which is order-preserving, unlike the hash table used +// for `serialization_order`). +static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED { - arraylist_t objects_list; - arraylist_new(&objects_list, backref_table_numel * 2); + size_t l = serialization_queue.len; arraylist_new(&layout_table, 0); - arraylist_grow(&layout_table, backref_table_numel); - memset(layout_table.items, 0, backref_table_numel * sizeof(void*)); - - // Order `backref_table` by `id` - size_t i, len = backref_table.size; - void **p = backref_table.table; - for (i = 0; i < len; i += 2) { - char *reloc_id = (char*)p[i + 1]; - if (reloc_id != HT_NOTFOUND) { - jl_value_t *v = (jl_value_t*)p[i]; - uintptr_t item = reloc_id - 1 - (char*)HT_NOTFOUND; - objects_list.items[objects_list.len++] = (void*)v; - objects_list.items[objects_list.len++] = (void*)item; - } - } - assert(backref_table_numel * 2 == objects_list.len); - qsort(objects_list.items, backref_table_numel, sizeof(void*) * 2, sysimg_sort_order); + arraylist_grow(&layout_table, l * 2); + memset(layout_table.items, 0, l * 2 * sizeof(void*)); // Serialize all entries - for (i = 0, len = backref_table_numel * 2; i < len; i += 2) { - jl_value_t *v = (jl_value_t*)objects_list.items[i]; // the object + for (size_t item = 0; item < l; item++) { + jl_value_t *v = (jl_value_t*)serialization_queue.items[item]; // the object JL_GC_PROMISE_ROOTED(v); - uintptr_t item = (uintptr_t)objects_list.items[i + 1]; // the id + assert(!(s->incremental && jl_object_in_image(v))); jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption"); // realign stream to expected gc alignment (16 bytes) uintptr_t skip_header_pos = ios_pos(s->s) + sizeof(jl_taggedvalue_t); write_padding(s->s, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos); + // write header - write_gctaggedfield(s, backref_id(s, t)); + if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t)) + arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(s->s)|1)); + write_gctaggedfield(s, backref_id(s, t, s->link_ids_gctags)); size_t reloc_offset = ios_pos(s->s); assert(item < layout_table.len && layout_table.items[item] == NULL); - layout_table.items[item] = (void*)reloc_offset; // store the inverse mapping of `backref_table` (`id` => object) - record_gvar(s, jl_get_llvm_gv(native_functions, v), ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + reloc_offset); + layout_table.items[item] = (void*)reloc_offset; // store the inverse mapping of `serialization_order` (`id` => object-as-streampos) + + if (s->incremental && needs_uniquing(v)) { + if (jl_is_method_instance(v)) { + jl_method_instance_t *mi = (jl_method_instance_t*)v; + write_pointerfield(s, mi->def.value); + write_pointerfield(s, mi->specTypes); + write_pointerfield(s, (jl_value_t*)mi->sparam_vals); + continue; + } + else if (!jl_is_datatype(v)) { + assert(jl_is_datatype_singleton(t) && "unreachable"); + } + } + else if (s->incremental && needs_recaching(v)) { + arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset); + } // write data - if (jl_is_cpointer(v)) { - write_pointer(s->s); - } - else if (jl_is_array(v)) { + if (jl_is_array(v)) { // Internal data for types in julia.h with `jl_array_t` field(s) #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes) jl_array_t *ar = (jl_array_t*)v; @@ -948,10 +1170,15 @@ static void jl_write_values(jl_serializer_state *s) arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target if (jl_is_cpointer_type(et)) { - // reset Ptr elements to C_NULL + // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) + const intptr_t *data = (const intptr_t*)jl_array_data(ar); size_t i; - for (i = 0; i < alen; i++) - write_pointer(s->const_data); + for (i = 0; i < alen; i++) { + if (data[i] != -1) + write_pointer(s->const_data); + else + ios_write(s->const_data, (char*)&data[i], sizeof(data[i])); + } } else { if (isbitsunion) { @@ -967,11 +1194,11 @@ static void jl_write_values(jl_serializer_state *s) // Pointer eltypes are encoded in the mutable data section size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt); size_t padding_amt = data - ios_pos(s->s); - write_padding(s->s, padding_amt); headersize += padding_amt; newa->data = (void*)headersize; // relocation offset arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target + write_padding(s->s, padding_amt); if (ar->flags.hasptr) { // copy all of the data first const char *data = (const char*)jl_array_data(ar); @@ -983,22 +1210,22 @@ static void jl_write_values(jl_serializer_state *s) for (i = 0; i < alen; i++) { for (j = 0; j < np; j++) { size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*); - jl_value_t *fld = *(jl_value_t**)&data[offset]; + jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1); + size_t fld_pos = reloc_offset + headersize + offset; if (fld != NULL) { - arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location - arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target - memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none) - } - else { - assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL); + arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target + record_uniquing(s, fld, fld_pos); } + memset(&s->s->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none) } } } else { + jl_value_t **data = (jl_value_t**)jl_array_data(ar); size_t i; for (i = 0; i < alen; i++) { - jl_value_t *e = jl_array_ptr_ref(v, i); + jl_value_t *e = get_replaceable_field(&data[i], 1); write_pointerfield(s, e); } } @@ -1006,19 +1233,16 @@ static void jl_write_values(jl_serializer_state *s) } else if (jl_typeis(v, jl_module_type)) { jl_write_module(s, item, (jl_module_t*)v); - // will need to recreate the binding table for this - arraylist_push(&reinit_list, (void*)item); - arraylist_push(&reinit_list, (void*)2); } else if (jl_typeis(v, jl_task_type)) { jl_error("Task cannot be serialized"); } else if (jl_is_svec(v)) { ios_write(s->s, (char*)v, sizeof(void*)); - size_t i, l = jl_svec_len(v); + size_t ii, l = jl_svec_len(v); assert(l > 0 || (jl_svec_t*)v == jl_emptysvec); - for (i = 0; i < l; i++) { - write_pointerfield(s, jl_svecref(v, i)); + for (ii = 0; ii < l; ii++) { + write_pointerfield(s, jl_svecref(v, ii)); } } else if (jl_is_string(v)) { @@ -1026,6 +1250,8 @@ static void jl_write_values(jl_serializer_state *s) write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility } else if (jl_datatype_nfields(t) == 0) { + // The object has no fields, so we just snapshot its byte representation + assert(!t->layout->npointers); assert(t->layout->npointers == 0); ios_write(s->s, (char*)v, jl_datatype_size(t)); } @@ -1058,8 +1284,8 @@ static void jl_write_values(jl_serializer_state *s) write_padding(s->s, offset - tot); tot = offset; size_t fsz = jl_field_size(t, i); - if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) { - // reset Ptr fields to C_NULL + if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(intptr_t*)slot != -1) { + // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) assert(!jl_field_isptr(t, i)); write_pointer(s->s); } @@ -1072,22 +1298,46 @@ static void jl_write_values(jl_serializer_state *s) size_t np = t->layout->npointers; for (i = 0; i < np; i++) { size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*); - jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset]); + jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], t->name->mutabl); + size_t fld_pos = offset + reloc_offset; if (fld != NULL) { - arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location - arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target + arraylist_push(&s->relocs_list, (void*)(uintptr_t)(fld_pos)); // relocation location + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target + record_uniquing(s, fld, fld_pos); } - memset(&s->s->buf[offset + reloc_offset], 0, sizeof(fld)); // relocation offset (none) + memset(&s->s->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none) } // A few objects need additional handling beyond the generic serialization above - if (jl_is_method(v)) { - write_padding(s->s, sizeof(jl_method_t) - tot); - if (((jl_method_t*)v)->ccallable) { - arraylist_push(&ccallable_list, (void*)item); - arraylist_push(&ccallable_list, (void*)3); + + if (s->incremental && jl_typeis(v, jl_typemap_entry_type)) { + jl_typemap_entry_t *newentry = (jl_typemap_entry_t*)&s->s->buf[reloc_offset]; + if (newentry->max_world == ~(size_t)0) { + if (newentry->min_world > 1) { + newentry->min_world = ~(size_t)0; + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + } + } + else { + // garbage newentry - delete it :( + newentry->min_world = 1; + newentry->max_world = 0; } } + else if (jl_is_method(v)) { + write_padding(s->s, sizeof(jl_method_t) - tot); // hidden fields + jl_method_t *m = (jl_method_t*)v; + jl_method_t *newm = (jl_method_t*)&s->s->buf[reloc_offset]; + if (s->incremental) { + if (newm->deleted_world != ~(size_t)0) + newm->deleted_world = 1; + else + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + newm->primary_world = ~(size_t)0; + } + if (m->ccallable) + arraylist_push(&s->ccallable_list, (void*)reloc_offset); + } else if (jl_is_method_instance(v)) { jl_method_instance_t *newmi = (jl_method_instance_t*)&s->s->buf[reloc_offset]; newmi->precompiled = 0; @@ -1097,6 +1347,22 @@ static void jl_write_values(jl_serializer_state *s) jl_code_instance_t *m = (jl_code_instance_t*)v; jl_code_instance_t *newm = (jl_code_instance_t*)&s->s->buf[reloc_offset]; + if (s->incremental) { + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + if (m->min_world > 1) + newm->min_world = ~(size_t)0; // checks that we reprocess this upon deserialization + if (m->max_world != ~(size_t)0) + newm->max_world = 0; + else { + if (m->inferred && ptrhash_has(&s->callers_with_edges, m->def)) + newm->max_world = 1; // sentinel value indicating this will need validation + if (m->min_world > 0 && m->inferred) { + // TODO: also check if this object is part of the codeinst cache + // will check on deserialize if this cache entry is still valid + } + } + } + newm->invoke = NULL; newm->isspecsig = 0; newm->specptr.fptr = NULL; @@ -1157,36 +1423,33 @@ static void jl_write_values(jl_serializer_state *s) arraylist_push(&s->relocs_list, (void*)(((uintptr_t)BuiltinFunctionRef << RELOC_TAG_OFFSET) + builtin_id - 2)); // relocation target } } + else if (jl_is_globalref(v)) { + jl_globalref_t *newg = (jl_globalref_t*)&s->s->buf[reloc_offset]; + // Don't save the cached binding reference in staticdata + // TODO: this should be a relocation pointing to the binding in the new image + newg->bnd_cache = NULL; + if (s->incremental) + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + } else if (jl_is_datatype(v)) { jl_datatype_t *dt = (jl_datatype_t*)v; jl_datatype_t *newdt = (jl_datatype_t*)&s->s->buf[reloc_offset]; - if (dt->layout != NULL) { - newdt->layout = NULL; + if (dt->layout != NULL) { + size_t nf = dt->layout->nfields; + size_t np = dt->layout->npointers; + size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); char *flddesc = (char*)dt->layout; - void* reloc_from = (void*)(reloc_offset + offsetof(jl_datatype_t, layout)); - void* reloc_to; - - void** bp = ptrhash_bp(&layout_cache, flddesc); - if (*bp == HT_NOTFOUND) { - int64_t streampos = ios_pos(s->const_data); - uintptr_t align = LLT_ALIGN(streampos, sizeof(void*)); - uintptr_t layout = align / sizeof(void*); - *bp = reloc_to = (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout); - - size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); - size_t layoutsize = sizeof(jl_datatype_layout_t) + dt->layout->nfields * fieldsize; - if (dt->layout->first_ptr != -1) - layoutsize += dt->layout->npointers << dt->layout->fielddesc_type; - write_padding(s->const_data, align - streampos); - ios_write(s->const_data, flddesc, layoutsize); - } - else { - reloc_to = *bp; - } - - arraylist_push(&s->relocs_list, reloc_from); - arraylist_push(&s->relocs_list, reloc_to); + size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize; + if (dt->layout->first_ptr != -1) + fldsize += np << dt->layout->fielddesc_type; + uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*)); + write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream + newdt->layout = NULL; // relocation offset + layout /= sizeof(void*); + arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_datatype_t, layout))); // relocation location + arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target + ios_write(s->const_data, flddesc, fldsize); } } else if (jl_is_typename(v)) { @@ -1215,8 +1478,7 @@ static void jl_write_values(jl_serializer_state *s) } else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) { // will need to rehash this, later (after types are fully constructed) - arraylist_push(&reinit_list, (void*)item); - arraylist_push(&reinit_list, (void*)1); + arraylist_push(&s->fixup_objs, (void*)reloc_offset); } else { write_padding(s->s, jl_datatype_size(t) - tot); @@ -1225,61 +1487,11 @@ static void jl_write_values(jl_serializer_state *s) } } - -// Record all symbols that get referenced by the generated code -// and queue them for pointer relocation -static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v) -{ - // since symbols are static, they might not have had a - // reference anywhere in the code image other than here - int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v); - if (gv != 0) { - uintptr_t item = backref_id(s, v); - assert(item >> RELOC_TAG_OFFSET == SymbolRef); - record_gvar(s, gv, item); - } - if (v->left) - jl_write_gv_syms(s, v->left); - if (v->right) - jl_write_gv_syms(s, v->right); -} - -// Record all hardcoded-tagged items that get referenced by -// the generated code and queue them for pointer relocation -static void jl_write_gv_tagref(jl_serializer_state *s, jl_value_t *v) -{ - int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v); - if (gv != 0) { - uintptr_t item = backref_id(s, v); - assert(item >> RELOC_TAG_OFFSET == TagRef); - record_gvar(s, gv, item); - } -} -static void jl_write_gv_tagrefs(jl_serializer_state *s) -{ - // this also ensures all objects referenced in the code have - // references in the system image to their global variable - // since codegen knows that some integer boxes are static, - // they might not have had a reference anywhere in the code - // image other than here - size_t i; - jl_write_gv_tagref(s, (jl_value_t*)s->ptls->root_task); - jl_write_gv_tagref(s, s->ptls->root_task->tls); - jl_write_gv_tagref(s, jl_nothing); - for (i = 0; i < NBOX_C; i++) { - jl_write_gv_tagref(s, jl_box_int32((int32_t)i - NBOX_C / 2)); - jl_write_gv_tagref(s, jl_box_int64((int64_t)i - NBOX_C / 2)); - } - for (i = 0; i < 256; i++) { - jl_write_gv_tagref(s, jl_box_uint8(i)); - } -} - // In deserialization, create Symbols and set up the // index for backreferencing static void jl_read_symbols(jl_serializer_state *s) { - assert(deser_sym.len == nsym_tag); + assert(deser_sym.len == 0); uintptr_t base = (uintptr_t)&s->symbols->buf[0]; uintptr_t end = base + s->symbols->size; while (base < end) { @@ -1331,6 +1543,8 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) case FunctionRef: assert(offset < JL_API_MAX && "unknown function pointer id"); break; + case ExternalLinkage: + break; case DataRef: default: assert(0 && "corrupt relocation item id"); @@ -1342,7 +1556,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) } // Compute target location at deserialization -static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id) +static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) { enum RefTags tag = (enum RefTags)(reloc_id >> RELOC_TAG_OFFSET); size_t offset = (reloc_id & (((uintptr_t)1 << RELOC_TAG_OFFSET) - 1)); @@ -1380,11 +1594,11 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas case FunctionRef: switch ((jl_callingconv_t)offset) { case JL_API_BOXED: - if (sysimg_fptrs.base) + if (s->image->fptrs.base) return (uintptr_t)jl_fptr_args; JL_FALLTHROUGH; case JL_API_WITH_PARAMETERS: - if (sysimg_fptrs.base) + if (s->image->fptrs.base) return (uintptr_t)jl_fptr_sparam; return (uintptr_t)NULL; case JL_API_CONST: @@ -1398,17 +1612,35 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas //default: assert("corrupt relocation item id"); } + case ExternalLinkage: + assert(link_ids); + assert(link_index); + assert(jl_build_ids); + uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids); + uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids); + assert(0 <= *link_index && *link_index < jl_array_len(link_ids)); + uint64_t build_id = link_id_data[*link_index]; + *link_index += 1; + size_t i = 0, nids = jl_array_len(jl_build_ids); + while (i < nids) { + if (build_id == build_id_data[i]) + break; + i++; + } + assert(i < nids); + assert(2*i < jl_linkage_blobs.len); + return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*); } abort(); } -static void jl_write_reloclist(ios_t *s, char *base, size_t size, arraylist_t *list) +static void jl_write_offsetlist(ios_t *s, char *base, size_t size, arraylist_t *list) { for (size_t i = 0; i < list->len; i += 2) { size_t last_pos = i ? (size_t)list->items[i - 2] : 0; size_t pos = (size_t)list->items[i]; - size_t item = (size_t)list->items[i + 1]; + size_t item = (size_t)list->items[i + 1]; // item is tagref-encoded uintptr_t *pv = (uintptr_t*)(base + pos); assert(pos < size && pos != 0); *pv = get_reloc_for_item(item, *pv); @@ -1435,19 +1667,32 @@ static void jl_write_reloclist(ios_t *s, char *base, size_t size, arraylist_t *l } +static void jl_write_arraylist(ios_t *s, arraylist_t *list) +{ + write_uint(s, list->len); + ios_write(s, (const char*)list->items, list->len * sizeof(void*)); +} + static void jl_write_relocations(jl_serializer_state *s) { char *base = &s->s->buf[0]; - jl_write_reloclist(s->relocs, base, s->s->size, &s->gctags_list); - jl_write_reloclist(s->relocs, base, s->s->size, &s->relocs_list); + jl_write_offsetlist(s->relocs, base, s->s->size, &s->gctags_list); + jl_write_offsetlist(s->relocs, base, s->s->size, &s->relocs_list); + if (s->incremental) { + jl_write_arraylist(s->relocs, &s->uniquing_types); + jl_write_arraylist(s->relocs, &s->uniquing_objs); + jl_write_arraylist(s->relocs, &s->fixup_types); + } + jl_write_arraylist(s->relocs, &s->fixup_objs); } -static void jl_read_reloclist(jl_serializer_state *s, uint8_t bits) +static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint8_t bits) { uintptr_t base = (uintptr_t)s->s->buf; size_t size = s->s->size; uintptr_t last_pos = 0; uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos); + int link_index = 0; while (1) { // Read the offset of the next object size_t pos_diff = 0; @@ -1469,40 +1714,58 @@ static void jl_read_reloclist(jl_serializer_state *s, uint8_t bits) last_pos = pos; uintptr_t *pv = (uintptr_t *)(base + pos); uintptr_t v = *pv; - v = get_item_for_reloc(s, base, size, v); + v = get_item_for_reloc(s, base, size, v, link_ids, &link_index); *pv = v | bits; } + assert(!link_ids || link_index == jl_array_len(link_ids)); +} + +static void jl_read_arraylist(ios_t *s, arraylist_t *list) +{ + size_t list_len = read_uint(s); + arraylist_new(list, 0); + arraylist_grow(list, list_len); + ios_read(s, (char*)list->items, list_len * sizeof(void*)); } -static char *sysimg_base; -static char *sysimg_relocs; void gc_sweep_sysimg(void) { - if (!sysimg_relocs) + size_t nblobs = n_linkage_blobs(); + if (nblobs == 0) return; - uintptr_t base = (uintptr_t)sysimg_base; - uintptr_t last_pos = 0; - uint8_t *current = (uint8_t *)sysimg_relocs; - while (1) { - // Read the offset of the next object - size_t pos_diff = 0; - size_t cnt = 0; + assert(jl_linkage_blobs.len == 2*nblobs); + assert(jl_image_relocs.len == nblobs); + for (size_t i = 0; i < 2*nblobs; i+=2) { + reloc_t *relocs = (reloc_t*)jl_image_relocs.items[i>>1]; + if (!relocs) + continue; + uintptr_t base = (uintptr_t)jl_linkage_blobs.items[i]; + uintptr_t last_pos = 0; + uint8_t *current = (uint8_t *)relocs; while (1) { - int8_t c = *current++; - pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++); - if ((c >> 7) == 0) + // Read the offset of the next object + size_t pos_diff = 0; + size_t cnt = 0; + while (1) { + int8_t c = *current++; + pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++); + if ((c >> 7) == 0) + break; + } + if (pos_diff == 0) break; - } - if (pos_diff == 0) - break; - uintptr_t pos = last_pos + pos_diff; - last_pos = pos; - jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos); - o->bits.gc = GC_OLD; + uintptr_t pos = last_pos + pos_diff; + last_pos = pos; + jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos); + o->bits.gc = GC_OLD; + } } } +// jl_write_value and jl_read_value are used for storing Julia objects that are adjuncts to +// the image proper. For example, new methods added to external callables require +// insertion into the appropriate method table. #define jl_write_value(s, v) _jl_write_value((s), (jl_value_t*)(v)) static void _jl_write_value(jl_serializer_state *s, jl_value_t *v) { @@ -1510,12 +1773,11 @@ static void _jl_write_value(jl_serializer_state *s, jl_value_t *v) write_reloc_t(s->s, 0); return; } - uintptr_t item = backref_id(s, v); + uintptr_t item = backref_id(s, v, NULL); uintptr_t reloc = get_reloc_for_item(item, 0); write_reloc_t(s->s, reloc); } - static jl_value_t *jl_read_value(jl_serializer_state *s) { uintptr_t base = (uintptr_t)&s->s->buf[0]; @@ -1524,16 +1786,44 @@ static jl_value_t *jl_read_value(jl_serializer_state *s) s->s->bpos += sizeof(reloc_t); if (offset == 0) return NULL; - return (jl_value_t*)get_item_for_reloc(s, base, size, offset); + return (jl_value_t*)get_item_for_reloc(s, base, size, offset, NULL, NULL); +} + +// The next two, `jl_read_offset` and `jl_delayed_reloc`, are essentially a split version +// of `jl_read_value` that allows usage of the relocation data rather than passing NULL +// to `get_item_for_reloc`. +// This works around what would otherwise be an order-dependency conundrum: objects +// that may require relocation data have to be inserted into `serialization_order`, +// and that may include some of the adjunct data that gets serialized via +// `jl_write_value`. But we can't interpret them properly until we read the relocation +// data, and that happens after we pull items out of the serialization stream. +static uintptr_t jl_read_offset(jl_serializer_state *s) +{ + uintptr_t base = (uintptr_t)&s->s->buf[0]; + uintptr_t offset = *(reloc_t*)(base + (uintptr_t)s->s->bpos); + s->s->bpos += sizeof(reloc_t); + return offset; } +static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL_GC_DISABLED +{ + if (!offset) + return NULL; + uintptr_t base = (uintptr_t)&s->s->buf[0]; + size_t size = s->s->size; + int link_index = 0; + jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->link_ids_relocs, &link_index); + assert(link_index < jl_array_len(s->link_ids_relocs)); + return ret; +} -static void jl_update_all_fptrs(jl_serializer_state *s) + +static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image) { - jl_sysimg_fptrs_t fvars = sysimg_fptrs; + jl_sysimg_fptrs_t fvars = image->fptrs; // make these NULL now so we skip trying to restore GlobalVariable pointers later - sysimg_gvars_base = NULL; - sysimg_fptrs.base = NULL; + image->gvars_base = NULL; + image->fptrs.base = NULL; if (fvars.base == NULL) return; int sysimg_fvars_max = s->fptr_record->size / sizeof(void*); @@ -1578,152 +1868,112 @@ static void jl_update_all_fptrs(jl_serializer_state *s) } } // Tell LLVM about the native code - jl_register_fptrs(sysimage_base, &fvars, linfos, sysimg_fvars_max); + jl_register_fptrs(image->base, &fvars, linfos, sysimg_fvars_max); } +static void write_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT +{ + ios_ensureroom(s->gvar_record, globals->len * sizeof(reloc_t)); + for (size_t i = 0; i < globals->len; i++) { + void *g = globals->items[i]; + if (jl_is_binding((uintptr_t)g)) { + jl_binding_t *b = (jl_binding_t*)g; + void *reloc = ptrhash_get(&bindings, g); + if (reloc != HT_NOTFOUND) { + assert(reloc != (void*)(uintptr_t)-1); + write_reloc_t(s->gvar_record, (uintptr_t)reloc); + continue; + } + // need to deal with foreign bindings here too + assert(s->incremental); + arraylist_push(&s->uniquing_objs, (void*)((i << 2) | 2)); // mark as gvar && !tag + g = (void*)jl_module_globalref(b->owner, b->name); + } + uintptr_t item = backref_id(s, g, s->link_ids_gvars); + uintptr_t reloc = get_reloc_for_item(item, 0); + write_reloc_t(s->gvar_record, reloc); + record_uniquing(s, (jl_value_t*)g, ((i << 2) | 2)); // mark as gvar && !tag + } +} // Pointer relocation for native-code referenced global variables -static void jl_update_all_gvars(jl_serializer_state *s) +static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image) { - if (sysimg_gvars_base == NULL) + if (image->gvars_base == NULL) return; - size_t gvname_index = 0; + size_t i = 0; + size_t l = s->gvar_record->size / sizeof(reloc_t); uintptr_t base = (uintptr_t)&s->s->buf[0]; size_t size = s->s->size; reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0]; - reloc_t *end = gvars + s->gvar_record->size / sizeof(reloc_t); - while (gvars < end) { - uintptr_t offset = *gvars; - if (offset) { - uintptr_t v = get_item_for_reloc(s, base, size, offset); - *sysimg_gvars(sysimg_gvars_base, gvname_index) = v; - } - gvname_index += 1; - gvars++; + int link_index = 0; + for (i = 0; i < l; i++) { + uintptr_t offset = gvars[i]; + uintptr_t v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &link_index); + uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i); + *gv = v; } + assert(!s->link_ids_gvars || link_index == jl_array_len(s->link_ids_gvars)); } - -// Reinitialization -static void jl_finalize_serializer(jl_serializer_state *s, arraylist_t *list) +static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image) { - size_t i, l; - - // record list of reinitialization functions - l = list->len; - for (i = 0; i < l; i += 2) { - size_t item = (size_t)list->items[i]; - size_t reloc_offset = (size_t)layout_table.items[item]; - assert(reloc_offset != 0); - write_reloc_t(s->s, reloc_offset); - write_uint8(s->s, (uintptr_t)list->items[i + 1]); + if (image->gvars_base == NULL) + return; + size_t i = 0; + size_t l = s->gvar_record->size / sizeof(reloc_t); + for (i = 0; i < l; i++) { + uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i); + uintptr_t v = *gv; + if (!jl_is_binding(v)) + v = (uintptr_t)jl_as_global_root((jl_value_t*)v); + *gv = v; } - write_reloc_t(s->s, 0); } -static void jl_reinit_item(jl_value_t *v, uint8_t how) JL_GC_DISABLED +static void jl_compile_extern(jl_method_t *m, void *sysimg_handle) JL_GC_DISABLED { - switch (how) { - case 1: { // rehash IdDict - jl_array_t **a = (jl_array_t**)v; - assert(jl_is_array(*a)); - // Assume *a don't need a write barrier - *a = jl_idtable_rehash(*a, jl_array_len(*a)); - jl_gc_wb(v, *a); - break; - } - case 2: { // rebuild the binding table for module v - jl_module_t *mod = (jl_module_t*)v; - assert(jl_is_module(mod)); - size_t nbindings = mod->bindings.size; - htable_new(&mod->bindings, nbindings); - struct binding { - jl_sym_t *asname; - uintptr_t tag; - jl_binding_t b; - } *b; - b = (struct binding*)&mod[1]; - while (nbindings > 0) { - ptrhash_put(&mod->bindings, b->asname, &b->b); - b += 1; - nbindings -= 1; - } - if (mod->usings.items != &mod->usings._space[0]) { - void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*)); - memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*)); - mod->usings.items = newitems; - } - break; - } - case 3: { // install ccallable entry point in JIT - jl_svec_t *sv = ((jl_method_t*)v)->ccallable; - int success = jl_compile_extern_c(NULL, NULL, jl_sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1)); - assert(success); (void)success; - break; - } - default: - assert(0 && "corrupt deserialization state"); - abort(); - } + // install ccallable entry point in JIT + jl_svec_t *sv = m->ccallable; + int success = jl_compile_extern_c(NULL, NULL, sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1)); + if (!success) + jl_safe_printf("WARNING: @ccallable was already defined for this method name\n"); // enjoy a very bad time + assert(success || !sysimg_handle); } -static void jl_finalize_deserializer(jl_serializer_state *s) JL_GC_DISABLED +static void jl_reinit_ccallable(arraylist_t *ccallable_list, char *base, void *sysimg_handle) { - // run reinitialization functions - uintptr_t base = (uintptr_t)&s->s->buf[0]; - while (1) { - size_t offset; - if (sizeof(reloc_t) <= 4) { - offset = read_uint32(s->s); - } - else { - offset = read_uint64(s->s); - } - if (offset == 0) - break; - jl_value_t *v = (jl_value_t*)(base + offset); - jl_reinit_item(v, read_uint8(s->s)); + for (size_t i = 0; i < ccallable_list->len; i++) { + uintptr_t item = (uintptr_t)ccallable_list->items[i]; + jl_method_t *m = (jl_method_t*)(base + item); + jl_compile_extern(m, sysimg_handle); } } - -// Code below helps slim down the images -static void jl_scan_type_cache_gv(jl_serializer_state *s, jl_svec_t *cache) -{ - size_t l = jl_svec_len(cache), i; - for (i = 0; i < l; i++) { - jl_value_t *ti = jl_svecref(cache, i); - if (ti == jl_nothing) - continue; - if (jl_get_llvm_gv(native_functions, ti)) { - jl_serialize_value(s, ti); - } - else if (jl_is_datatype(ti)) { - jl_value_t *singleton = ((jl_datatype_t*)ti)->instance; - if (singleton && jl_get_llvm_gv(native_functions, singleton)) - jl_serialize_value(s, ti); - } - } -} - -// remove cached types not referenced in the stream +// Code below helps slim down the images by +// removing cached types not referenced in the stream static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED { size_t l = jl_svec_len(cache), i; + if (l == 0) + return cache; for (i = 0; i < l; i++) { jl_value_t *ti = jl_svecref(cache, i); if (ti == jl_nothing) continue; - if (ptrhash_get(&backref_table, ti) == HT_NOTFOUND) + if (ptrhash_get(&serialization_order, ti) == HT_NOTFOUND) jl_svecset(cache, i, jl_nothing); } - void *idx = ptrhash_get(&backref_table, cache); - ptrhash_remove(&backref_table, cache); + void *idx = ptrhash_get(&serialization_order, cache); + assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1); + assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache); cache = cache_rehash_set(cache, l); - ptrhash_put(&backref_table, cache, idx); + // redirect all references to the old cache to relocate to the new cache object + ptrhash_put(&serialization_order, cache, idx); + serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache; return cache; } @@ -1734,7 +1984,7 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache) jl_value_t *ti = jl_svecref(cache, i); if (ti == jl_nothing) break; - if (ptrhash_get(&backref_table, ti) != HT_NOTFOUND) + if (ptrhash_get(&serialization_order, ti) != HT_NOTFOUND) jl_svecset(cache, ins++, ti); } while (ins < l) @@ -1777,11 +2027,6 @@ static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig return ret; } -static void record_field_change(jl_value_t **addr, jl_value_t *newval) -{ - ptrhash_put(&field_replace, (void*)addr, newval); -} - static void strip_specializations_(jl_method_instance_t *mi) { assert(jl_is_method_instance(mi)); @@ -1866,6 +2111,7 @@ static void jl_strip_all_codeinfos(void) // triggering non-relocatability of compressed CodeInfos. // Set the number of such roots in each method when the sysimg is // serialized. +// TODO: move this to `jl_write_values` static int set_nroots_sysimg__(jl_typemap_entry_t *def, void *_env) { jl_method_t *m = def->func.method; @@ -1885,9 +2131,6 @@ static void jl_set_nroots_sysimg(void) // --- entry points --- -static void jl_init_serializer2(int); -static void jl_cleanup_serializer2(void); - jl_array_t *jl_global_roots_table; static jl_mutex_t global_roots_lock; @@ -1931,33 +2174,93 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED) return val; } -static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED +static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key, + /* outputs */ jl_array_t **extext_methods, + jl_array_t **new_specializations, jl_array_t **method_roots_list, + jl_array_t **ext_targets, jl_array_t **edges) { - jl_gc_collect(JL_GC_FULL); - jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers - JL_TIMING(SYSIMG_DUMP); + // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist + // ext_targets: [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods + // ordinary dispatch: invokesig=NULL, callee is MethodInstance + // `invoke` dispatch: invokesig is signature, callee is MethodInstance + // abstract call: callee is signature + // edges: [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods + + assert(edges_map == NULL); + JL_GC_PUSH1(&edges_map); + + // Save the inferred code from newly inferred, external methods + htable_new(&external_mis, 0); // we need external_mis until after `jl_collect_edges` finishes + *new_specializations = queue_external_cis(newly_inferred); + // Collect the new method roots + htable_t methods_with_newspecs; + htable_new(&methods_with_newspecs, 0); + jl_collect_methods(&methods_with_newspecs, *new_specializations); + *method_roots_list = jl_alloc_vec_any(0); + jl_collect_new_roots(*method_roots_list, &methods_with_newspecs, worklist_key); + htable_free(&methods_with_newspecs); + + // Collect method extensions and edges data + edges_map = jl_alloc_vec_any(0); + *extext_methods = jl_alloc_vec_any(0); + size_t i, len = jl_array_len(mod_array); + for (i = 0; i < len; i++) { + jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i); + assert(jl_is_module(m)); + if (m->parent == m) // some toplevel modules (really just Base) aren't actually + jl_collect_extext_methods_from_mod(*extext_methods, m); + } + jl_collect_methtable_from_mod(*extext_methods, jl_type_type_mt); + jl_collect_missing_backedges(jl_type_type_mt); + jl_collect_methtable_from_mod(*extext_methods, jl_nonfunction_mt); + jl_collect_missing_backedges(jl_nonfunction_mt); + // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in callers_with_edges. + // Process this to extract `edges` and `ext_targets`. + *ext_targets = jl_alloc_vec_any(0); + *edges = jl_alloc_vec_any(0); + jl_collect_edges(*edges, *ext_targets); + htable_free(&external_mis); + assert(edges_map == NULL); // jl_collect_edges clears this when done - htable_new(&field_replace, 10000); + JL_GC_POP(); +} + +// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage +static void jl_save_system_image_to_stream(ios_t *f, + jl_array_t *worklist, jl_array_t *extext_methods, + jl_array_t *new_specializations, jl_array_t *method_roots_list, + jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED +{ + htable_new(&field_replace, 0); // strip metadata and IR when requested if (jl_options.strip_metadata || jl_options.strip_ir) jl_strip_all_codeinfos(); - jl_set_nroots_sysimg(); + if (worklist == NULL) + jl_set_nroots_sysimg(); int en = jl_gc_enable(0); - jl_init_serializer2(1); - htable_reset(&backref_table, 250000); - arraylist_new(&reinit_list, 0); - arraylist_new(&ccallable_list, 0); + nsym_tag = 0; + htable_new(&symbol_table, 0); + htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs)); + uintptr_t i; + for (i = 0; id_to_fptrs[i] != NULL; i++) { + ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2)); + } + htable_new(&serialization_order, 25000); + htable_new(&unique_ready, 0); + htable_new(&nullptrs, 0); + htable_new(&bindings, 0); arraylist_new(&object_worklist, 0); - backref_table_numel = 0; + arraylist_new(&serialization_queue, 0); ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record; - ios_mem(&sysimg, 1000000); - ios_mem(&const_data, 100000); - ios_mem(&symbols, 100000); - ios_mem(&relocs, 100000); - ios_mem(&gvar_record, 100000); - ios_mem(&fptr_record, 100000); + ios_mem(&sysimg, 0); + ios_mem(&const_data, 0); + ios_mem(&symbols, 0); + ios_mem(&relocs, 0); + ios_mem(&gvar_record, 0); + ios_mem(&fptr_record, 0); jl_serializer_state s; + s.incremental = !(worklist == NULL); s.s = &sysimg; s.const_data = &const_data; s.symbols = &symbols; @@ -1967,16 +2270,31 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); - jl_value_t **const*const tags = get_tags(); - - // empty!(Core.ARGS) - if (jl_core_module != NULL) { - jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS")); - if (args != NULL) { - jl_array_del_end(args, jl_array_len(args)); + arraylist_new(&s.uniquing_types, 0); + arraylist_new(&s.uniquing_objs, 0); + arraylist_new(&s.fixup_types, 0); + arraylist_new(&s.fixup_objs, 0); + arraylist_new(&s.ccallable_list, 0); + s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, 0); + s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, 0); + s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, 0); + htable_new(&s.callers_with_edges, 0); + jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL; + + arraylist_t gvars; + arraylist_new(&gvars, 0); + if (native_functions) + jl_get_llvm_gvs(native_functions, &gvars); + + if (worklist == NULL) { + // empty!(Core.ARGS) + if (jl_core_module != NULL) { + jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS")); + if (args != NULL) { + jl_array_del_end(args, jl_array_len(args)); + } } } - jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL; jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL; jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL; @@ -1993,44 +2311,63 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED { // step 1: record values (recursively) that need to go in the image size_t i; - for (i = 0; tags[i] != NULL; i++) { - jl_value_t *tag = *tags[i]; - jl_serialize_value(&s, tag); + if (worklist == NULL) { + for (i = 0; tags[i] != NULL; i++) { + jl_value_t *tag = *tags[i]; + jl_queue_for_serialization(&s, tag); + } + jl_queue_for_serialization(&s, jl_global_roots_table); + jl_queue_for_serialization(&s, s.ptls->root_task->tls); } - jl_serialize_value(&s, jl_global_roots_table); - jl_serialize_reachable(&s); - // step 1.1: check for values only found in the generated code - arraylist_t typenames; - arraylist_new(&typenames, 0); - for (i = 0; i < backref_table.size; i += 2) { - jl_typename_t *tn = (jl_typename_t*)backref_table.table[i]; - if (tn == HT_NOTFOUND || !jl_is_typename(tn)) - continue; - arraylist_push(&typenames, tn); + else { + // To ensure we don't have to manually update the list, go through all tags and queue any that are not otherwise + // judged to be externally-linked + htable_new(&external_objects, NUM_TAGS); + for (size_t i = 0; tags[i] != NULL; i++) { + jl_value_t *tag = *tags[i]; + ptrhash_put(&external_objects, tag, tag); + } + // Queue the worklist itself as the first item we serialize + jl_queue_for_serialization(&s, worklist); + jl_queue_for_serialization(&s, jl_module_init_order); + // Classify the CodeInstances with respect to their need for validation + classify_callers(&s.callers_with_edges, edges); } - for (i = 0; i < typenames.len; i++) { - jl_typename_t *tn = (jl_typename_t*)typenames.items[i]; - jl_scan_type_cache_gv(&s, tn->cache); - jl_scan_type_cache_gv(&s, tn->linearcache); + // step 1.1: as needed, serialize the data needed for insertion into the running system + if (extext_methods) { + assert(ext_targets); + assert(edges); + // Queue method extensions + jl_queue_for_serialization(&s, extext_methods); + // Queue the new specializations + jl_queue_for_serialization(&s, new_specializations); + // Queue the new roots + jl_queue_for_serialization(&s, method_roots_list); + // Queue the edges + jl_queue_for_serialization(&s, ext_targets); + jl_queue_for_serialization(&s, edges); } jl_serialize_reachable(&s); - // step 1.2: prune (garbage collect) some special weak references from + // step 1.2: now that we have marked all bindings (badly), ensure all gvars are part of the sysimage + record_gvars(&s, &gvars); + jl_serialize_reachable(&s); + // step 1.3: prune (garbage collect) some special weak references from // built-in type caches - for (i = 0; i < typenames.len; i++) { - jl_typename_t *tn = (jl_typename_t*)typenames.items[i]; - tn->cache = jl_prune_type_cache_hash(tn->cache); - jl_gc_wb(tn, tn->cache); - jl_prune_type_cache_linear(tn->linearcache); + for (i = 0; i < serialization_queue.len; i++) { + jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i]; + if (jl_is_typename(tn)) { + tn->cache = jl_prune_type_cache_hash(tn->cache); + jl_gc_wb(tn, tn->cache); + jl_prune_type_cache_linear(tn->linearcache); + } } - arraylist_free(&typenames); } { // step 2: build all the sysimg sections write_padding(&sysimg, sizeof(uintptr_t)); jl_write_values(&s); + write_gvars(&s, &gvars); jl_write_relocations(&s); - jl_write_gv_syms(&s, jl_get_root_symbol()); - jl_write_gv_tagrefs(&s); } if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) { @@ -2051,8 +2388,10 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED ); jl_exit(1); } + htable_free(&s.callers_with_edges); // step 3: combine all of the sections into one file + assert(ios_pos(f) % JL_CACHE_BYTE_ALIGNMENT == 0); write_uint(f, sysimg.size - sizeof(uintptr_t)); ios_seek(&sysimg, sizeof(uintptr_t)); ios_copyall(f, &sysimg); @@ -2090,56 +2429,181 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED ios_close(&fptr_record); { // step 4: record locations of special roots - s.s = f; write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f)); - size_t i; - for (i = 0; tags[i] != NULL; i++) { - jl_value_t *tag = *tags[i]; - jl_write_value(&s, tag); + s.s = f; + if (worklist == NULL) { + size_t i; + for (i = 0; tags[i] != NULL; i++) { + jl_value_t *tag = *tags[i]; + jl_write_value(&s, tag); + } + jl_write_value(&s, jl_global_roots_table); + jl_write_value(&s, s.ptls->root_task->tls); + write_uint32(f, jl_get_gs_ctr()); + write_uint(f, jl_atomic_load_acquire(&jl_world_counter)); + write_uint(f, jl_typeinf_world); } - jl_write_value(&s, jl_global_roots_table); - jl_write_value(&s, s.ptls->root_task->tls); - write_uint32(f, jl_get_gs_ctr()); - write_uint(f, jl_atomic_load_acquire(&jl_world_counter)); - write_uint(f, jl_typeinf_world); - jl_finalize_serializer(&s, &reinit_list); - jl_finalize_serializer(&s, &ccallable_list); - } + else { + jl_write_value(&s, worklist); + // save module initialization order + if (jl_module_init_order != NULL) { + size_t i, l = jl_array_len(jl_module_init_order); + for (i = 0; i < l; i++) { + // verify that all these modules were saved + assert(ptrhash_get(&serialization_order, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND); + } + } + jl_write_value(&s, jl_module_init_order); + jl_write_value(&s, extext_methods); + jl_write_value(&s, new_specializations); + jl_write_value(&s, method_roots_list); + jl_write_value(&s, ext_targets); + jl_write_value(&s, edges); + } + write_uint32(f, jl_array_len(s.link_ids_gctags)); + ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags)*sizeof(uint64_t)); + write_uint32(f, jl_array_len(s.link_ids_relocs)); + ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs)*sizeof(uint64_t)); + write_uint32(f, jl_array_len(s.link_ids_gvars)); + ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t)); + jl_write_arraylist(s.s, &s.ccallable_list); + } + // Write the build_id key + uint64_t buildid = 0; + if (worklist) + buildid = jl_worklist_key(worklist); + write_uint32(f, buildid >> 32); + write_uint32(f, buildid & (((uint64_t)1 << 32) - 1)); assert(object_worklist.len == 0); arraylist_free(&object_worklist); + arraylist_free(&serialization_queue); arraylist_free(&layout_table); - arraylist_free(&reinit_list); - arraylist_free(&ccallable_list); + arraylist_free(&s.ccallable_list); arraylist_free(&s.relocs_list); arraylist_free(&s.gctags_list); + arraylist_free(&gvars); htable_free(&field_replace); - jl_cleanup_serializer2(); + if (worklist) + htable_free(&external_objects); + htable_free(&serialization_order); + htable_free(&unique_ready); + htable_free(&nullptrs); + htable_free(&bindings); + htable_free(&symbol_table); + htable_free(&fptr_to_id); + nsym_tag = 0; jl_gc_enable(en); } -JL_DLLEXPORT ios_t *jl_create_system_image(void *_native_data) +static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_array_t **mod_array, jl_array_t **udeps, int64_t *srctextpos, int64_t *checksumpos) { + *mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array) + assert(jl_precompile_toplevel_module == NULL); + jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); + + write_header(f); + // last word of the header is the checksumpos + *checksumpos = ios_pos(f) - sizeof(uint64_t); + // write description of contents (name, uuid, buildid) + write_worklist_for_header(f, worklist); + // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist + // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. + // Also write Preferences. + // last word of the dependency list is the end of the data / start of the srctextpos + *srctextpos = write_dependency_list(f, worklist, udeps); // srctextpos: position of srctext entry in header index (update later) + // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed) + // this can return errors during deserialize, + // best to keep it early (before any actual initialization) + write_mod_list(f, *mod_array); +} + + +JL_DLLEXPORT ios_t *jl_create_system_image(void *_native_data, jl_array_t *worklist) +{ + jl_gc_collect(JL_GC_FULL); + jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers + JL_TIMING(SYSIMG_DUMP); + + jl_task_t *ct = jl_current_task; ios_t *f = (ios_t*)malloc_s(sizeof(ios_t)); ios_mem(f, 0); + jl_array_t *mod_array = NULL, *udeps = NULL, *extext_methods = NULL, *new_specializations = NULL; + jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; + JL_GC_PUSH7(&mod_array, &udeps, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); + int64_t srctextpos = 0; + int64_t checksumpos = 0; + int64_t datastartpos = 0; + if (worklist) { + jl_write_header_for_incremental(f, worklist, &mod_array, &udeps, &srctextpos, &checksumpos); + jl_gc_enable_finalizers(ct, 0); // make sure we don't run any Julia code concurrently after this point + jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist), &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); + write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f)); + datastartpos = ios_pos(f); + } native_functions = _native_data; - jl_save_system_image_to_stream(f); + jl_save_system_image_to_stream(f, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges); + native_functions = NULL; + if (worklist) { + jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point + // Go back and update the checksum in the header + int64_t dataendpos = ios_pos(f); + uint32_t checksum = jl_crc32c(0, &f->buf[datastartpos], dataendpos - datastartpos); + ios_seek(f, checksumpos); + write_uint64(f, checksum | ((uint64_t)0xfafbfcfd << 32)); + ios_seek(f, srctextpos); + write_uint64(f, dataendpos); + // Write the source-text for the dependent files + // Go back and update the source-text position to point to the current position + if (udeps) { + ios_seek_end(f); + // Each source-text file is written as + // int32: length of abspath + // char*: abspath + // uint64: length of src text + // char*: src text + // At the end we write int32(0) as a terminal sentinel. + size_t len = jl_array_len(udeps); + ios_t srctext; + for (size_t i = 0; i < len; i++) { + jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + jl_value_t *depmod = jl_fieldref(deptuple, 0); // module + // Dependencies declared with `include_dependency` are excluded + // because these may not be Julia code (and could be huge) + if (depmod != (jl_value_t*)jl_main_module) { + jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath + const char *depstr = jl_string_data(dep); + if (!depstr[0]) + continue; + ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); + if (!srctp) { + jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", + jl_string_data(dep)); + continue; + } + size_t slen = jl_string_len(dep); + write_int32(f, slen); + ios_write(f, depstr, slen); + int64_t posfile = ios_pos(f); + write_uint64(f, 0); // placeholder for length of this file in bytes + uint64_t filelen = (uint64_t) ios_copyall(f, &srctext); + ios_close(&srctext); + ios_seek(f, posfile); + write_uint64(f, filelen); + ios_seek_end(f); + } + } + } + write_int32(f, 0); // mark the end of the source text + jl_precompile_toplevel_module = NULL; + } + + JL_GC_POP(); return f; } JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src); -JL_DLLEXPORT void jl_save_system_image(const char *fname) -{ - ios_t f; - if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) { - jl_errorf("cannot open system image file \"%s\" for writing", fname); - } - JL_SIGATOMIC_BEGIN(); - jl_save_system_image_to_stream(&f); - ios_close(&f); - JL_SIGATOMIC_END(); -} // Takes in a path of the form "usr/lib/julia/sys.so" (jl_restore_system_image should be passed the same string) JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname) @@ -2165,16 +2629,31 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle) if (jl_options.cpu_target == NULL) jl_options.cpu_target = "native"; jl_sysimg_handle = handle; - sysimg_fptrs = jl_init_processor_sysimg(handle); + sysimage.fptrs = jl_init_processor_sysimg(handle); } -static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED +#ifndef JL_NDEBUG +// skip the performance optimizations of jl_types_equal and just use subtyping directly +// one of these types is invalid - that's why we're doing the recache type operation +// static int jl_invalid_types_equal(jl_datatype_t *a, jl_datatype_t *b) +// { +// return jl_subtype((jl_value_t*)a, (jl_value_t*)b) && jl_subtype((jl_value_t*)b, (jl_value_t*)a); +// } +#endif + +static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum, + /* outputs */ jl_array_t **restored, jl_array_t **init_order, + jl_array_t **extext_methods, + jl_array_t **new_specializations, jl_array_t **method_roots_list, + jl_array_t **ext_targets, jl_array_t **edges, + char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED { JL_TIMING(SYSIMG_LOAD); int en = jl_gc_enable(0); - jl_init_serializer2(0); ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record; jl_serializer_state s; + s.incremental = restored != NULL; // jl_linkage_blobs.len > 0; + s.image = image; s.s = NULL; s.const_data = &const_data; s.symbols = &symbols; @@ -2184,7 +2663,11 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); + s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = NULL; jl_value_t **const*const tags = get_tags(); + htable_t new_dt_objs; + htable_new(&new_dt_objs, 0); + arraylist_new(&deser_sym, 0); // step 1: read section map assert(ios_pos(f) == 0 && f->bm == bm_mem); @@ -2222,27 +2705,67 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED ios_skip(f, sizeof_fptr_record); // step 2: get references to special values - s.s = f; ios_seek(f, LLT_ALIGN(ios_pos(f), 8)); assert(!ios_eof(f)); - size_t i; - for (i = 0; tags[i] != NULL; i++) { - jl_value_t **tag = tags[i]; - *tag = jl_read_value(&s); - } - jl_global_roots_table = (jl_array_t*)jl_read_value(&s); - // set typeof extra-special values now that we have the type set by tags above - jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header; - jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header; - s.ptls->root_task->tls = jl_read_value(&s); - jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls); - jl_init_int32_int64_cache(); - jl_init_box_caches(); - - uint32_t gs_ctr = read_uint32(f); - jl_atomic_store_release(&jl_world_counter, read_uint(f)); - jl_typeinf_world = read_uint(f); - jl_set_gs_ctr(gs_ctr); + s.s = f; + uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0; + uintptr_t offset_ext_targets = 0, offset_edges = 0; + if (!s.incremental) { + size_t i; + for (i = 0; tags[i] != NULL; i++) { + jl_value_t **tag = tags[i]; + *tag = jl_read_value(&s); + } + jl_global_roots_table = (jl_array_t*)jl_read_value(&s); + // set typeof extra-special values now that we have the type set by tags above + jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header; + jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header; + s.ptls->root_task->tls = jl_read_value(&s); + jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls); + jl_init_int32_int64_cache(); + jl_init_box_caches(); + + uint32_t gs_ctr = read_uint32(f); + jl_atomic_store_release(&jl_world_counter, read_uint(f)); + jl_typeinf_world = read_uint(f); + jl_set_gs_ctr(gs_ctr); + } + else { + jl_atomic_fetch_add(&jl_world_counter, 1); + offset_restored = jl_read_offset(&s); + offset_init_order = jl_read_offset(&s); + offset_extext_methods = jl_read_offset(&s); + offset_new_specializations = jl_read_offset(&s); + offset_method_roots_list = jl_read_offset(&s); + offset_ext_targets = jl_read_offset(&s); + offset_edges = jl_read_offset(&s); + } + size_t nlinks_gctags = read_uint32(f); + if (nlinks_gctags > 0) { + s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gctags); + ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint64_t)); + } + size_t nlinks_relocs = read_uint32(f); + if (nlinks_relocs > 0) { + s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, nlinks_relocs); + ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint64_t)); + } + size_t nlinks_gvars = read_uint32(f); + if (nlinks_gvars > 0) { + s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gvars); + ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint64_t)); + } + jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list); + if (s.incremental) { + assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges); + *restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored); + *init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order); + *extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods); + *new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations); + *method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list); + *ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets); + *edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges); + } s.s = NULL; // step 3: apply relocations @@ -2250,26 +2773,333 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED jl_read_symbols(&s); ios_close(&symbols); - sysimg_base = &sysimg.buf[0]; - sysimg_relocs = &relocs.buf[0]; - jl_gc_set_permalloc_region((void*)sysimg_base, (void*)(sysimg_base + sysimg.size)); + char *image_base = (char*)&sysimg.buf[0]; + reloc_t *relocs_base = (reloc_t*)&relocs.buf[0]; + if (base) + *base = image_base; s.s = &sysimg; - jl_read_reloclist(&s, GC_OLD); // gctags + jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags size_t sizeof_tags = ios_pos(&relocs); (void)sizeof_tags; - jl_read_reloclist(&s, 0); // general relocs + jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs + // s.link_ids_gvars will be processed in `jl_update_all_gvars` + jl_update_all_gvars(&s, image); // gvars relocs + if (s.incremental) { + jl_read_arraylist(s.relocs, &s.uniquing_types); + jl_read_arraylist(s.relocs, &s.uniquing_objs); + jl_read_arraylist(s.relocs, &s.fixup_types); + } + else { + arraylist_new(&s.uniquing_types, 0); + arraylist_new(&s.uniquing_objs, 0); + arraylist_new(&s.fixup_types, 0); + } + jl_read_arraylist(s.relocs, &s.fixup_objs); + // Perform the uniquing of objects that we don't "own" and consequently can't promise + // weren't created by some other package before this one got loaded: + // - iterate through all objects that need to be uniqued. The first encounter has to be the + // "reconstructable blob". We either look up the object (if something has created it previously) + // or construct it for the first time, crucially outside the pointer range of any pkgimage. + // This ensures it stays unique-worthy. + // - after we've stored the address of the "real" object (which for convenience we do among the data + // written to allow lookup/reconstruction), then we have to update references to that "reconstructable blob": + // instead of performing the relocation within the package image, we instead (re)direct all references + // to the external object. + arraylist_t cleanup_list; + arraylist_new(&cleanup_list, 0); + arraylist_t delay_list; + arraylist_new(&delay_list, 0); + for (size_t i = 0; i < s.uniquing_types.len; i++) { + uintptr_t item = (uintptr_t)s.uniquing_types.items[i]; + // check whether we are operating on the typetag + // (needing to ignore GC bits) or a regular field + int tag = (item & 1) == 1; + // check whether this is a gvar index + int gvar = (item & 2) == 2; + item &= ~(uintptr_t)3; + uintptr_t *pfld; + jl_value_t **obj, *newobj; + if (gvar) { + if (image->gvars_base == NULL) + continue; + item >>= 2; + assert(item < s.gvar_record->size / sizeof(reloc_t)); + pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item); + obj = *(jl_value_t***)pfld; + assert(tag == 0); + } + else { + pfld = (uintptr_t*)(image_base + item); + if (tag) + obj = (jl_value_t**)jl_typeof(jl_valueof(pfld)); + else + obj = *(jl_value_t***)pfld; + if ((char*)obj > (char*)pfld) { + assert(tag == 0); + arraylist_push(&delay_list, pfld); + arraylist_push(&delay_list, obj); + ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid + *pfld = (uintptr_t)NULL; + continue; + } + } + jl_value_t *otyp = jl_typeof(obj); // the original type of the object that was written here + assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg + sizeof(uintptr_t)); + if (otyp == (jl_value_t*)jl_datatype_type) { + jl_datatype_t *dt = (jl_datatype_t*)obj[0], *newdt; + if (jl_is_datatype(dt)) { + newdt = dt; // already done + } + else { + dt = (jl_datatype_t*)obj; + arraylist_push(&cleanup_list, (void*)obj); + ptrhash_remove(&new_dt_objs, (void*)obj); // unmark obj as invalid before must_be_new_dt + if (must_be_new_dt((jl_value_t*)dt, &new_dt_objs, image_base, sizeof_sysimg)) + newdt = NULL; + else + newdt = jl_lookup_cache_type_(dt); + if (newdt == NULL) { + // make a non-owned copy of obj so we don't accidentally + // assume this is the unique copy later + newdt = jl_new_uninitialized_datatype(); + jl_astaggedvalue(newdt)->bits.gc = GC_OLD; + // leave most fields undefined for now, but we may need instance later, + // and we overwrite the name field (field 0) now so preserve it too + if (dt->instance) { + assert(dt->instance == jl_nothing); + newdt->instance = dt->instance = jl_gc_permobj(0, newdt); + } + static_assert(offsetof(jl_datatype_t, name) == 0, ""); + newdt->name = dt->name; + ptrhash_put(&new_dt_objs, (void*)newdt, dt); + } + else { + assert(newdt->hash == dt->hash); + } + obj[0] = (jl_value_t*)newdt; + } + newobj = (jl_value_t*)newdt; + } + else { + assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + assert(jl_is_datatype_singleton((jl_datatype_t*)otyp) && "unreachable"); + newobj = ((jl_datatype_t*)otyp)->instance; + assert(newobj != jl_nothing); + arraylist_push(&cleanup_list, (void*)obj); + } + if (tag) + *pfld = (uintptr_t)newobj | GC_OLD; + else + *pfld = (uintptr_t)newobj; + assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + assert(jl_typeis(obj, otyp)); + } + // A few fields (reached via super) might be self-recursive. This is rare, but handle them now. + // They cannot be instances though, since the type must fully exist before the singleton field can be allocated + for (size_t i = 0; i < delay_list.len; ) { + uintptr_t *pfld = (uintptr_t*)delay_list.items[i++]; + jl_value_t **obj = (jl_value_t **)delay_list.items[i++]; + assert(jl_is_datatype(obj)); + jl_datatype_t *dt = (jl_datatype_t*)obj[0]; + assert(jl_is_datatype(dt)); + jl_value_t *newobj = (jl_value_t*)dt; + *pfld = (uintptr_t)newobj; + assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + } + arraylist_free(&delay_list); + // now that all the fields of dt are assigned and unique, copy them into + // their final newdt memory location: this ensures we do not accidentally + // think this pkg image has the singular unique copy of it + void **table = new_dt_objs.table; + for (size_t i = 0; i < new_dt_objs.size; i += 2) { + void *dt = table[i + 1]; + if (dt != HT_NOTFOUND) { + jl_datatype_t *newdt = (jl_datatype_t*)table[i]; + jl_typename_t *name = newdt->name; + static_assert(offsetof(jl_datatype_t, name) == 0, ""); + assert(*(void**)dt == (void*)newdt); + *newdt = *(jl_datatype_t*)dt; // copy the datatype fields (except field 1, which we corrupt above) + newdt->name = name; + } + } + // we should never see these pointers again, so scramble their memory, so any attempt to look at them crashes + for (size_t i = 0; i < cleanup_list.len; i++) { + void *item = cleanup_list.items[i]; + jl_taggedvalue_t *o = jl_astaggedvalue(item); + jl_value_t *t = jl_typeof(item); // n.b. might be 0xbabababa already + if (t == (jl_value_t*)jl_datatype_type) + memset(o, 0xba, sizeof(jl_value_t*) + sizeof(jl_datatype_t)); + else + memset(o, 0xba, sizeof(jl_value_t*) + 0); // singleton + } + arraylist_grow(&cleanup_list, -cleanup_list.len); + // finally cache all our new types now + for (size_t i = 0; i < new_dt_objs.size; i += 2) { + void *dt = table[i + 1]; + if (dt != HT_NOTFOUND) { + jl_datatype_t *newdt = (jl_datatype_t*)table[i]; + jl_cache_type_(newdt); + } + } + for (size_t i = 0; i < s.fixup_types.len; i++) { + uintptr_t item = (uintptr_t)s.fixup_types.items[i]; + jl_value_t *obj = (jl_value_t*)(image_base + item); + assert(jl_is_datatype(obj)); + jl_cache_type_((jl_datatype_t*)obj); + } + // Perform fixups: things like updating world ages, inserting methods & specializations, etc. + size_t world = jl_atomic_load_acquire(&jl_world_counter); + for (size_t i = 0; i < s.uniquing_objs.len; i++) { + uintptr_t item = (uintptr_t)s.uniquing_objs.items[i]; + // check whether this is a gvar index + int gvar = (item & 2) == 2; + item &= ~(uintptr_t)3; + uintptr_t *pfld; + jl_value_t **obj, *newobj; + if (gvar) { + if (image->gvars_base == NULL) + continue; + item >>= 2; + assert(item < s.gvar_record->size / sizeof(reloc_t)); + pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item); + obj = *(jl_value_t***)pfld; + } + else { + pfld = (uintptr_t*)(image_base + item); + obj = *(jl_value_t***)pfld; + } + jl_value_t *otyp = jl_typeof(obj); // the original type of the object that was written here + if (otyp == (jl_value_t*)jl_method_instance_type) { + assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg + sizeof(uintptr_t)); + jl_value_t *m = obj[0]; + if (jl_is_method_instance(m)) { + newobj = m; // already done + } + else { + arraylist_push(&cleanup_list, (void*)obj); + jl_value_t *specTypes = obj[1]; + jl_value_t *sparams = obj[2]; + newobj = (jl_value_t*)jl_specializations_get_linfo((jl_method_t*)m, specTypes, (jl_svec_t*)sparams); + obj[0] = newobj; + } + } + else if (otyp == (jl_value_t*)jl_globalref_type) { + // this actually needs a binding_t object at that gvar slot if we encountered it in the uniquing_objs + jl_globalref_t *g = (jl_globalref_t*)obj; + jl_binding_t *b = jl_get_binding_if_bound(g->mod, g->name); + assert(b); // XXX: actually this is probably quite buggy, since julia's handling of global resolution is rather bad + newobj = (jl_value_t*)b; + } + else { + abort(); // should be unreachable + } + *pfld = (uintptr_t)newobj; + assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + assert(jl_typeis(obj, otyp)); + } + arraylist_free(&s.uniquing_types); + arraylist_free(&s.uniquing_objs); + for (size_t i = 0; i < cleanup_list.len; i++) { + void *item = cleanup_list.items[i]; + jl_taggedvalue_t *o = jl_astaggedvalue(item); + jl_value_t *t = jl_typeof(item); + if (t == (jl_value_t*)jl_method_instance_type) + memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored + } + arraylist_free(&cleanup_list); + for (size_t i = 0; i < s.fixup_objs.len; i++) { + uintptr_t item = (uintptr_t)s.fixup_objs.items[i]; + jl_value_t *obj = (jl_value_t*)(image_base + item); + if (jl_typeis(obj, jl_typemap_entry_type)) { + jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj; + entry->min_world = world; + } + else if (jl_is_method(obj)) { + jl_method_t *m = (jl_method_t*)obj; + m->primary_world = world; + } + else if (jl_is_method_instance(obj)) { + jl_method_instance_t *newobj = jl_specializations_get_or_insert((jl_method_instance_t*)obj); + assert(newobj == (jl_method_instance_t*)obj); // strict insertion expected + (void)newobj; + } + else if (jl_is_code_instance(obj)) { + jl_code_instance_t *ci = (jl_code_instance_t*)obj; + assert(s.incremental); + ci->min_world = world; + if (ci->max_world == 1) { // sentinel value: has edges to external callables + ptrhash_put(&new_code_instance_validate, ci, (void*)(~(uintptr_t)HT_NOTFOUND)); // "HT_FOUND" + } + else if (ci->max_world) { + // It's valid, but it may not be connected + if (!ci->def->cache) + ci->def->cache = ci; + } + else { + // Ensure this code instance is not connected + if (ci->def->cache == ci) + ci->def->cache = NULL; + } + } + else if (jl_is_globalref(obj)) { + continue; // wait until all the module binding tables have been initialized + } + else if (jl_is_module(obj)) { + // rebuild the binding table for module v + // TODO: maybe want to delay this more, but that only strongly matters for async / thread safety + // and we are already bad at that + jl_module_t *mod = (jl_module_t*)obj; + mod->build_id.hi = checksum; + size_t nbindings = mod->bindings.size; + htable_new(&mod->bindings, nbindings); + struct binding { + jl_sym_t *asname; + uintptr_t tag; + jl_binding_t b; + } *b; + b = (struct binding*)&mod[1]; + while (nbindings > 0) { + ptrhash_put(&mod->bindings, b->asname, &b->b); + b += 1; + nbindings -= 1; + } + if (mod->usings.items != &mod->usings._space[0]) { + void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*)); + memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*)); + mod->usings.items = newitems; + } + } + else { + // rehash IdDict + //assert(((jl_datatype_t*)(jl_typeof(obj)))->name == jl_idtable_typename); + jl_array_t **a = (jl_array_t**)obj; + assert(jl_typeis(*a, jl_array_any_type)); + *a = jl_idtable_rehash(*a, jl_array_len(*a)); + jl_gc_wb(obj, *a); + } + } + // Now pick up the globalref binding pointer field, when we can + for (size_t i = 0; i < s.fixup_objs.len; i++) { + uintptr_t item = (uintptr_t)s.fixup_objs.items[i]; + jl_value_t *obj = (jl_value_t*)(image_base + item); + if (jl_is_globalref(obj)) { + jl_globalref_t *r = (jl_globalref_t*)obj; + jl_binding_t *b = jl_get_binding_if_bound(r->mod, r->name); + r->bnd_cache = b && b->value ? b : NULL; + } + } + arraylist_free(&s.fixup_types); + arraylist_free(&s.fixup_objs); + + if (s.incremental) + jl_root_new_gvars(&s, image); ios_close(&relocs); ios_close(&const_data); - jl_update_all_gvars(&s); // gvars relocs ios_close(&gvar_record); - s.s = NULL; - jl_kwcall_mt = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt; + htable_free(&new_dt_objs); - s.s = f; - // reinit items except ccallables - jl_finalize_deserializer(&s); s.s = NULL; if (0) { @@ -2289,21 +3119,166 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED (unsigned)sizeof_gvar_record, (unsigned)sizeof_fptr_record); } + if (cachesizes) { + cachesizes->sysdata = sizeof_sysimg; + cachesizes->isbitsdata = sizeof_constdata; + cachesizes->symboldata = sizeof_symbols; + cachesizes->tagslist = sizeof_tags; + cachesizes->reloclist = sizeof_relocations - sizeof_tags; + cachesizes->gvarlist = sizeof_gvar_record; + cachesizes->fptrlist = sizeof_fptr_record; + } + if (!s.incremental) + jl_init_codegen(); s.s = &sysimg; - jl_init_codegen(); - jl_update_all_fptrs(&s); // fptr relocs and registration - // reinit ccallables, which require codegen to be initialized - s.s = f; - jl_finalize_deserializer(&s); + jl_update_all_fptrs(&s, image); // fptr relocs and registration + if (!ccallable_list) { + // TODO: jl_sysimg_handle or img_handle? + jl_reinit_ccallable(&s.ccallable_list, image_base, jl_sysimg_handle); + arraylist_free(&s.ccallable_list); + } + s.s = NULL; ios_close(&fptr_record); ios_close(&sysimg); - s.s = NULL; - jl_gc_reset_alloc_count(); + if (!s.incremental) + jl_gc_reset_alloc_count(); + arraylist_free(&deser_sym); + + // Prepare for later external linkage against the sysimg + // Also sets up images for protection against garbage collection + arraylist_push(&jl_linkage_blobs, (void*)image_base); + arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t))); + arraylist_push(&jl_image_relocs, (void*)relocs_base); + + // jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1); + uint64_t buildid = (((uint64_t)read_uint32(f)) << 32) | read_uint32(f); + if (!jl_build_ids) + jl_build_ids = jl_alloc_array_1d(jl_array_uint64_type, 0); + jl_array_grow_end(jl_build_ids, 1); + uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids); + build_id_data[jl_array_len(jl_build_ids)-1] = buildid; jl_gc_enable(en); - jl_cleanup_serializer2(); +} + +static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos) +{ + if (ios_eof(f) || 0 == (*checksum = jl_read_verify_header(f)) || (*checksum >> 32 != 0xfafbfcfd)) { + return jl_get_exceptionf(jl_errorexception_type, + "Precompile file header verification checks failed."); + } + { // skip past the mod list + size_t len; + while ((len = read_int32(f))) + ios_skip(f, len + 3 * sizeof(uint64_t)); + } + { // skip past the dependency list + size_t deplen = read_uint64(f); + ios_skip(f, deplen - sizeof(uint64_t)); + *dataendpos = read_uint64(f); + } + + // verify that the system state is valid + return read_verify_mod_list(f, depmods); +} + +// TODO?: refactor to make it easier to create the "package inspector" +static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int complete) +{ + uint64_t checksum = 0; + int64_t dataendpos = 0; + jl_value_t *verify_fail = jl_validate_cache_file(f, depmods, &checksum, &dataendpos); + if (verify_fail) + return verify_fail; + + jl_value_t *restored = NULL; + jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; + jl_svec_t *cachesizes_sv = NULL; + char *base; + arraylist_t ccallable_list; + JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv); + + { // make a permanent in-memory copy of f (excluding the header) + ios_bufmode(f, bm_none); + JL_SIGATOMIC_BEGIN(); + size_t len_begin = LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT); + assert(len_begin > 0 && len_begin < dataendpos); + size_t len = dataendpos - len_begin; + char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + ios_seek(f, len_begin); + if (ios_readall(f, sysimg, len) != len || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { + restored = jl_get_exceptionf(jl_errorexception_type, "Error reading system image file."); + JL_SIGATOMIC_END(); + } + else { + ios_close(f); + ios_static_buffer(f, sysimg, len); + htable_new(&new_code_instance_validate, 0); + pkgcachesizes cachesizes; + jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes); + JL_SIGATOMIC_END(); + + // Insert method extensions + jl_insert_methods(extext_methods); + // No special processing of `new_specializations` is required because recaching handled it + // Add roots to methods + jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored)); + // Handle edges + jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations); // restore external backedges (needs to be last) + // check new CodeInstances and validate any that lack external backedges + validate_new_code_instances(); + // reinit ccallables + jl_reinit_ccallable(&ccallable_list, base, NULL); + arraylist_free(&ccallable_list); + htable_free(&new_code_instance_validate); + if (complete) { + cachesizes_sv = jl_alloc_svec_uninit(7); + jl_svec_data(cachesizes_sv)[0] = jl_box_long(cachesizes.sysdata); + jl_svec_data(cachesizes_sv)[1] = jl_box_long(cachesizes.isbitsdata); + jl_svec_data(cachesizes_sv)[2] = jl_box_long(cachesizes.symboldata); + jl_svec_data(cachesizes_sv)[3] = jl_box_long(cachesizes.tagslist); + jl_svec_data(cachesizes_sv)[4] = jl_box_long(cachesizes.reloclist); + jl_svec_data(cachesizes_sv)[5] = jl_box_long(cachesizes.gvarlist); + jl_svec_data(cachesizes_sv)[6] = jl_box_long(cachesizes.fptrlist); + restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list, + ext_targets, edges, cachesizes_sv); + } else + restored = (jl_value_t*)jl_svec(2, restored, init_order); + } + } + + JL_GC_POP(); + return restored; +} + +static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image) +{ + uint64_t checksum = 0; // TODO: make this real + jl_restore_system_image_from_stream_(f, image, NULL, checksum, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int complete) +{ + ios_t f; + ios_static_buffer(&f, (char*)buf, sz); + jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, complete); + ios_close(&f); + return ret; +} + +JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete) +{ + ios_t f; + if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) { + return jl_get_exceptionf(jl_errorexception_type, + "Cache file \"%s\" not found.\n", fname); + } + jl_image_t pkgimage = {}; + jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, complete); + ios_close(&f); + return ret; } // TODO: need to enforce that the alignment of the buffer is suitable for vectors @@ -2333,7 +3308,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) jl_errorf("Error reading system image file."); ios_close(&f); ios_static_buffer(&f, sysimg, len); - jl_restore_system_image_from_stream(&f); + jl_restore_system_image_from_stream(&f, &sysimage); ios_close(&f); JL_SIGATOMIC_END(); } @@ -2344,38 +3319,52 @@ JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len) ios_t f; JL_SIGATOMIC_BEGIN(); ios_static_buffer(&f, (char*)buf, len); - jl_restore_system_image_from_stream(&f); + jl_restore_system_image_from_stream(&f, &sysimage); ios_close(&f); JL_SIGATOMIC_END(); } -// --- init --- - -static void jl_init_serializer2(int for_serialize) +JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods) { - if (for_serialize) { - htable_new(&symbol_table, 0); - htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs)); - htable_new(&backref_table, 0); - htable_new(&layout_cache, 0); - uintptr_t i; - for (i = 0; id_to_fptrs[i] != NULL; i++) { - ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2)); - } + void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY); + if (!pkgimg_handle) { +#ifdef _OS_WINDOWS_ + int err; + char reason[256]; + err = GetLastError(); + win32_formatmessage(err, reason, sizeof(reason)); +#else + const char *reason = dlerror(); +#endif + jl_errorf("Error opening package file %s: %s\n", fname, reason); } - else { - arraylist_new(&deser_sym, 0); + const char *pkgimg_data; + jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); + size_t *plen; + jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + + jl_image_t pkgimage; + pkgimage.fptrs = jl_init_processor_pkgimg(pkgimg_handle); + if (!jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_base", (void **)&pkgimage.gvars_base, 0)) { + pkgimage.gvars_base = NULL; } - nsym_tag = 0; -} + jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_offsets", (void **)&pkgimage.gvars_offsets, 1); + pkgimage.gvars_offsets += 1; + jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, 0); -static void jl_cleanup_serializer2(void) -{ - htable_reset(&symbol_table, 0); - htable_reset(&fptr_to_id, 0); - htable_reset(&backref_table, 0); - htable_reset(&layout_cache, 0); - arraylist_free(&deser_sym); + void *pgcstack_func_slot; + jl_dlsym(pkgimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 0); + if (pgcstack_func_slot) { // Empty package images might miss these + void *pgcstack_key_slot; + jl_dlsym(pkgimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1); + jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot); + + size_t *tls_offset_idx; + jl_dlsym(pkgimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1); + *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); + } + + return mod; } #ifdef __cplusplus diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c new file mode 100644 index 00000000000000..60b1a0c14eff61 --- /dev/null +++ b/src/staticdata_utils.c @@ -0,0 +1,1264 @@ +static htable_t new_code_instance_validate; +static htable_t external_mis; + +// inverse of backedges graph (caller=>callees hash) +jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this + +static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT +{ + write_uint64(s, *((uint64_t*)&x)); +} + +// Decide if `t` must be new, because it points to something new. +// If it is new, the object (in particular, the super field) might not be entirely +// valid for the cache, so we want to finish transforming it before attempting +// to look in the cache for it +int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeof_sysimg) +{ + //if (jl_object_in_image(t)) + // return 0; // fast-path for rejection + assert(ptrhash_get(news, (void*)t) != (void*)t); + if (ptrhash_has(news, (void*)t) || ptrhash_has(news, (void*)jl_typeof(t))) + return 1; + if (!(image_base < (char*)t && (char*)t <= image_base + sizeof_sysimg)) + return 0; // fast-path for rejection + if (jl_is_uniontype(t)) { + jl_uniontype_t *u = (jl_uniontype_t*)t; + return must_be_new_dt(u->a, news, image_base, sizeof_sysimg) || + must_be_new_dt(u->b, news, image_base, sizeof_sysimg); + } + else if (jl_is_unionall(t)) { + jl_unionall_t *ua = (jl_unionall_t*)t; + return must_be_new_dt((jl_value_t*)ua->var, news, image_base, sizeof_sysimg) || + must_be_new_dt(ua->body, news, image_base, sizeof_sysimg); + } + else if (jl_is_typevar(t)) { + jl_tvar_t *tv = (jl_tvar_t*)t; + return must_be_new_dt(tv->lb, news, image_base, sizeof_sysimg) || + must_be_new_dt(tv->ub, news, image_base, sizeof_sysimg); + } + else if (jl_is_vararg(t)) { + jl_vararg_t *tv = (jl_vararg_t*)t; + if (tv->T && must_be_new_dt(tv->T, news, image_base, sizeof_sysimg)) + return 1; + if (tv->N && must_be_new_dt(tv->N, news, image_base, sizeof_sysimg)) + return 1; + } + else if (jl_is_datatype(t)) { + jl_datatype_t *dt = (jl_datatype_t*)t; + assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?"); + jl_datatype_t *super = dt->super; + // check if super is news, since then we must be new also + // (it is also possible that super is indeterminate now, wait for `t` + // to be resolved, then will be determined later and fixed up by the + // delay_list, for this and any other references to it). + while (super != jl_any_type) { + assert(super); + if (ptrhash_has(news, (void*)super)) + return 1; + if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg)) + break; // fast-path for rejection of super + // otherwise super might be something that was not cached even though a later supertype might be + // for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}` + super = super->super; + } + jl_svec_t *tt = dt->parameters; + size_t i, l = jl_svec_len(tt); + for (i = 0; i < l; i++) + if (must_be_new_dt(jl_tparam(dt, i), news, image_base, sizeof_sysimg)) + return 1; + } + else { + return must_be_new_dt(jl_typeof(t), news, image_base, sizeof_sysimg); + } + return 0; +} + +static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT +{ + assert(jl_is_array(worklist)); + size_t len = jl_array_len(worklist); + if (len > 0) { + jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1); + assert(jl_is_module(topmod)); + return topmod->build_id.lo; + } + return 0; +} + +static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/; +JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred) +{ + assert(_newly_inferred == NULL || jl_is_array(_newly_inferred)); + newly_inferred = (jl_array_t*) _newly_inferred; +} + +static int method_instance_in_queue(jl_method_instance_t *mi) +{ + return ptrhash_get(&external_mis, mi) != HT_NOTFOUND; +} + +// compute whether a type references something internal to worklist +// and thus could not have existed before deserialize +// and thus does not need delayed unique-ing +static int type_in_worklist(jl_value_t *v) JL_NOTSAFEPOINT +{ + if (jl_object_in_image(v)) + return 0; // fast-path for rejection + if (jl_is_uniontype(v)) { + jl_uniontype_t *u = (jl_uniontype_t*)v; + return type_in_worklist(u->a) || + type_in_worklist(u->b); + } + else if (jl_is_unionall(v)) { + jl_unionall_t *ua = (jl_unionall_t*)v; + return type_in_worklist((jl_value_t*)ua->var) || + type_in_worklist(ua->body); + } + else if (jl_is_typevar(v)) { + jl_tvar_t *tv = (jl_tvar_t*)v; + return type_in_worklist(tv->lb) || + type_in_worklist(tv->ub); + } + else if (jl_is_vararg(v)) { + jl_vararg_t *tv = (jl_vararg_t*)v; + if (tv->T && type_in_worklist(tv->T)) + return 1; + if (tv->N && type_in_worklist(tv->N)) + return 1; + } + else if (jl_is_datatype(v)) { + jl_datatype_t *dt = (jl_datatype_t*)v; + if (!jl_object_in_image((jl_value_t*)dt->name)) + return 1; + jl_svec_t *tt = dt->parameters; + size_t i, l = jl_svec_len(tt); + for (i = 0; i < l; i++) + if (type_in_worklist(jl_tparam(dt, i))) + return 1; + } + else { + return type_in_worklist(jl_typeof(v)); + } + return 0; +} + +static void mark_backedges_in_worklist(jl_method_instance_t *mi, htable_t *visited, int found) +{ + int oldfound = (char*)ptrhash_get(visited, mi) - (char*)HT_NOTFOUND; + if (oldfound < 3) + return; // not in-progress + ptrhash_put(visited, mi, (void*)((char*)HT_NOTFOUND + 1 + found)); +#ifndef NDEBUG + jl_module_t *mod = mi->def.module; + if (jl_is_method(mod)) + mod = ((jl_method_t*)mod)->module; + assert(jl_is_module(mod)); + assert(!mi->precompiled && jl_object_in_image((jl_value_t*)mod)); + assert(mi->backedges); +#endif + size_t i = 0, n = jl_array_len(mi->backedges); + while (i < n) { + jl_method_instance_t *be; + i = get_next_edge(mi->backedges, i, NULL, &be); + mark_backedges_in_worklist(be, visited, found); + } +} + +// When we infer external method instances, ensure they link back to the +// package. Otherwise they might be, e.g., for external macros +static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, int depth) +{ + jl_module_t *mod = mi->def.module; + if (jl_is_method(mod)) + mod = ((jl_method_t*)mod)->module; + assert(jl_is_module(mod)); + if (mi->precompiled || !jl_object_in_image((jl_value_t*)mod)) { + return 1; + } + if (!mi->backedges) { + return 0; + } + void **bp = ptrhash_bp(visited, mi); + // HT_NOTFOUND: not yet analyzed + // HT_NOTFOUND + 1: no link back + // HT_NOTFOUND + 2: does link back + // HT_NOTFOUND + 3 + depth: in-progress + int found = (char*)*bp - (char*)HT_NOTFOUND; + if (found) + return found - 1; + *bp = (void*)((char*)HT_NOTFOUND + 3 + depth); // preliminarily mark as in-progress + size_t i = 0, n = jl_array_len(mi->backedges); + int cycle = 0; + while (i < n) { + jl_method_instance_t *be; + i = get_next_edge(mi->backedges, i, NULL, &be); + int child_found = has_backedge_to_worklist(be, visited, depth + 1); + if (child_found == 1) { + found = 1; + break; + } + else if (child_found >= 2 && child_found - 2 < cycle) { + // record the cycle will resolve at depth "cycle" + cycle = child_found - 2; + assert(cycle); + } + } + if (!found && cycle && cycle != depth) + return cycle + 2; + bp = ptrhash_bp(visited, mi); // re-acquire since rehashing might change the location + *bp = (void*)((char*)HT_NOTFOUND + 1 + found); + if (cycle) { + // If we are the top of the current cycle, now mark all other parts of + // our cycle by re-walking the backedges graph and marking all WIP + // items as found. + // Be careful to only re-walk as far as we had originally scanned above. + // Or if we found a backedge, also mark all of the other parts of the + // cycle as also having an backedge. + n = i; + i = 0; + while (i < n) { + jl_method_instance_t *be; + i = get_next_edge(mi->backedges, i, NULL, &be); + mark_backedges_in_worklist(be, visited, found); + } + } + return found; +} + +// given the list of CodeInstances that were inferred during the +// build, select those that are (1) external, and (2) are inferred to be called +// from the worklist or explicitly added by a `precompile` statement. +// Also prepares for method_instance_in_queue queries. +static jl_array_t *queue_external_cis(jl_array_t *list) +{ + if (list == NULL) + return NULL; + size_t i; + htable_t visited; + assert(jl_is_array(list)); + size_t n0 = jl_array_len(list); + htable_new(&visited, n0); + jl_array_t *new_specializations = jl_alloc_vec_any(0); + JL_GC_PUSH1(&new_specializations); + for (i = 0; i < n0; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(list, i); + assert(jl_is_code_instance(ci)); + jl_method_instance_t *mi = ci->def; + jl_method_t *m = mi->def.method; + if (jl_is_method(m)) { + if (jl_object_in_image((jl_value_t*)m->module)) { + if (ptrhash_get(&external_mis, mi) == HT_NOTFOUND) { + int found = has_backedge_to_worklist(mi, &visited, 1); + assert(found == 0 || found == 1); + if (found == 1) { + ptrhash_put(&external_mis, mi, mi); + jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci); + } + } + } + } + } + htable_free(&visited); + JL_GC_POP(); + return new_specializations; +} + +// New roots for external methods +static void jl_collect_methods(htable_t *mset, jl_array_t *new_specializations) +{ + size_t i, l = new_specializations ? jl_array_len(new_specializations) : 0; + jl_value_t *v; + jl_method_t *m; + for (i = 0; i < l; i++) { + v = jl_array_ptr_ref(new_specializations, i); + assert(jl_is_code_instance(v)); + m = ((jl_code_instance_t*)v)->def->def.method; + assert(jl_is_method(m)); + ptrhash_put(mset, (void*)m, (void*)m); + } +} + +static void jl_collect_new_roots(jl_array_t *roots, htable_t *mset, uint64_t key) +{ + size_t i, sz = mset->size; + int nwithkey; + jl_method_t *m; + void **table = mset->table; + jl_array_t *newroots = NULL; + JL_GC_PUSH1(&newroots); + for (i = 0; i < sz; i += 2) { + if (table[i+1] != HT_NOTFOUND) { + m = (jl_method_t*)table[i]; + assert(jl_is_method(m)); + nwithkey = nroots_with_key(m, key); + if (nwithkey) { + jl_array_ptr_1d_push(roots, (jl_value_t*)m); + newroots = jl_alloc_vec_any(nwithkey); + jl_array_ptr_1d_push(roots, (jl_value_t*)newroots); + rle_iter_state rootiter = rle_iter_init(0); + uint64_t *rletable = NULL; + size_t nblocks2 = 0, nroots = jl_array_len(m->roots), k = 0; + if (m->root_blocks) { + rletable = (uint64_t*)jl_array_data(m->root_blocks); + nblocks2 = jl_array_len(m->root_blocks); + } + while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2)) + if (rootiter.key == key) + jl_array_ptr_set(newroots, k++, jl_array_ptr_ref(m->roots, rootiter.i)); + assert(k == nwithkey); + } + } + } + JL_GC_POP(); +} + +// Create the forward-edge map (caller => callees) +// the intent of these functions is to invert the backedges tree +// for anything that points to a method not part of the worklist +// +// from MethodTables +static void jl_collect_missing_backedges(jl_methtable_t *mt) +{ + jl_array_t *backedges = mt->backedges; + if (backedges) { + size_t i, l = jl_array_len(backedges); + for (i = 1; i < l; i += 2) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i); + jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1); // signature of abstract callee + jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); + if (edges == NULL) { + edges = jl_alloc_vec_any(0); + JL_GC_PUSH1(&edges); + edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); + JL_GC_POP(); + } + jl_array_ptr_1d_push(edges, NULL); + jl_array_ptr_1d_push(edges, missing_callee); + } + } +} + + +// from MethodInstances +static void collect_backedges(jl_method_instance_t *callee, int internal) +{ + jl_array_t *backedges = callee->backedges; + if (backedges) { + size_t i = 0, l = jl_array_len(backedges); + while (i < l) { + jl_value_t *invokeTypes; + jl_method_instance_t *caller; + i = get_next_edge(backedges, i, &invokeTypes, &caller); + jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); + if (edges == NULL) { + edges = jl_alloc_vec_any(0); + JL_GC_PUSH1(&edges); + edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); + JL_GC_POP(); + } + jl_array_ptr_1d_push(edges, invokeTypes); + jl_array_ptr_1d_push(edges, (jl_value_t*)callee); + } + } +} + + +// For functions owned by modules not on the worklist, call this on each method. +// - if the method is owned by a worklist module, add it to the list of things to be +// fully serialized +// - Collect all backedges (may be needed later when we invert this list). +static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) +{ + jl_array_t *s = (jl_array_t*)closure; + jl_method_t *m = ml->func.method; + if (s && !jl_object_in_image((jl_value_t*)m->module)) { + jl_array_ptr_1d_push(s, (jl_value_t*)m); + } + jl_svec_t *specializations = m->specializations; + size_t i, l = jl_svec_len(specializations); + for (i = 0; i < l; i++) { + jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i); + if ((jl_value_t*)callee != jl_nothing) + collect_backedges(callee, !s); + } + return 1; +} + +static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) +{ + jl_typemap_visitor(mt->defs, jl_collect_methcache_from_mod, (void*)s); +} + +// Collect methods of external functions defined by modules in the worklist +// "extext" = "extending external" +// Also collect relevant backedges +static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) +{ + if (s && !jl_object_in_image((jl_value_t*)m)) + s = NULL; // do not collect any methods + size_t i; + void **table = m->bindings.table; + for (i = 1; i < m->bindings.size; i += 2) { + if (table[i] != HT_NOTFOUND) { + jl_binding_t *b = (jl_binding_t*)table[i]; + if (b->owner == m && b->value && b->constp) { + jl_value_t *bv = jl_unwrap_unionall(b->value); + if (jl_is_datatype(bv)) { + jl_typename_t *tn = ((jl_datatype_t*)bv)->name; + if (tn->module == m && tn->name == b->name && tn->wrapper == b->value) { + jl_methtable_t *mt = tn->mt; + if (mt != NULL && + (jl_value_t*)mt != jl_nothing && + (mt != jl_type_type_mt && mt != jl_nonfunction_mt)) { + assert(mt->module == tn->module); + jl_collect_methtable_from_mod(s, mt); + if (s) + jl_collect_missing_backedges(mt); + } + } + } + else if (jl_is_module(b->value)) { + jl_module_t *child = (jl_module_t*)b->value; + if (child != m && child->parent == m && child->name == b->name) { + // this is the original/primary binding for the submodule + jl_collect_extext_methods_from_mod(s, (jl_module_t*)b->value); + } + } + else if (jl_is_mtable(b->value)) { + jl_methtable_t *mt = (jl_methtable_t*)b->value; + if (mt->module == m && mt->name == b->name) { + // this is probably an external method table, so let's assume so + // as there is no way to precisely distinguish them, + // and the rest of this serializer does not bother + // to handle any method tables specially + jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv); + } + } + } + } + } +} + +static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges) +{ + jl_array_t *callees = NULL; + JL_GC_PUSH2(&caller, &callees); + callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL); + if (callees != NULL) { + jl_array_ptr_1d_push(edges, (jl_value_t*)caller); + jl_array_ptr_1d_push(edges, (jl_value_t*)callees); + size_t i, l = jl_array_len(callees); + for (i = 1; i < l; i += 2) { + jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i); + if (c && jl_is_method_instance(c)) { + arraylist_push(wq, c); + } + } + } + JL_GC_POP(); +} + + +// Extract `edges` and `ext_targets` from `edges_map` +// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges +// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target +static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + arraylist_t wq; + arraylist_new(&wq, 0); + void **table = (void**)jl_array_data(edges_map); // edges_map is caller => callees + size_t table_size = jl_array_len(edges_map); + for (size_t i = 0; i < table_size; i += 2) { + assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) && + "edges_map changed during iteration"); + jl_method_instance_t *caller = (jl_method_instance_t*)table[i]; + jl_array_t *callees = (jl_array_t*)table[i + 1]; + if (callees == NULL) + continue; + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + if (!jl_object_in_image((jl_value_t*)caller->def.method->module) || + method_instance_in_queue(caller)) { + jl_record_edges(caller, &wq, edges); + } + } + while (wq.len) { + jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq); + jl_record_edges(caller, &wq, edges); + } + arraylist_free(&wq); + edges_map = NULL; + htable_t edges_map2; + htable_new(&edges_map2, 0); + htable_t edges_ids; + size_t l = edges ? jl_array_len(edges) : 0; + htable_new(&edges_ids, l); + for (size_t i = 0; i < l / 2; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2); + void *target = (void*)((char*)HT_NOTFOUND + i + 1); + ptrhash_put(&edges_ids, (void*)caller, target); + } + // process target list to turn it into a memoized validity table + // and compute the old methods list, ready for serialization + jl_value_t *matches = NULL; + jl_array_t *callee_ids = NULL; + JL_GC_PUSH2(&matches, &callee_ids); + for (size_t i = 0; i < l; i += 2) { + jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1); + size_t l = jl_array_len(callees); + callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + idxs[0] = 0; + size_t nt = 0; + for (size_t j = 0; j < l; j += 2) { + jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j); + jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); + assert(callee && "unsupported edge"); + + if (jl_is_method_instance(callee)) { + jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); + if (!jl_object_in_image((jl_value_t*)mt->module)) + continue; + } + + // (nullptr, c) => call + // (invokeTypes, c) => invoke + // (nullptr, invokeTypes) => missing call + // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any) + void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee); + if (target == HT_NOTFOUND) { + size_t min_valid = 0; + size_t max_valid = ~(size_t)0; + if (invokeTypes) { + jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); + if ((jl_value_t*)mt == jl_nothing) { + callee_ids = NULL; // invalid + break; + } + else { + matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid); + if (matches == jl_nothing) { + callee_ids = NULL; // invalid + break; + } + matches = (jl_value_t*)((jl_method_match_t*)matches)->method; + } + } + else { + jl_value_t *sig; + if (jl_is_method_instance(callee)) + sig = ((jl_method_instance_t*)callee)->specTypes; + else + sig = callee; + int ambig = 0; + matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, + -1, 0, world, &min_valid, &max_valid, &ambig); + if (matches == jl_false) { + callee_ids = NULL; // invalid + break; + } + size_t k; + for (k = 0; k < jl_array_len(matches); k++) { + jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); + jl_array_ptr_set(matches, k, match->method); + } + } + jl_array_ptr_1d_push(ext_targets, invokeTypes); + jl_array_ptr_1d_push(ext_targets, callee); + jl_array_ptr_1d_push(ext_targets, matches); + target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3); + ptrhash_put(&edges_map2, (void*)callee, target); + } + idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; + } + jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids + if (!callee_ids) + continue; + idxs[0] = nt; + // record place of every method in edges + // add method edges to the callee_ids list + for (size_t j = 0; j < l; j += 2) { + jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); + if (callee && jl_is_method_instance(callee)) { + void *target = ptrhash_get(&edges_ids, (void*)callee); + if (target != HT_NOTFOUND) { + idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; + } + } + } + jl_array_del_end(callee_ids, l - nt); + } + JL_GC_POP(); + htable_free(&edges_map2); +} + +// Headers + +// serialize information about all loaded modules +static void write_mod_list(ios_t *s, jl_array_t *a) +{ + size_t i; + size_t len = jl_array_len(a); + for (i = 0; i < len; i++) { + jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i); + assert(jl_is_module(m)); + if (jl_object_in_image((jl_value_t*)m)) { + const char *modname = jl_symbol_name(m->name); + size_t l = strlen(modname); + write_int32(s, l); + ios_write(s, modname, l); + write_uint64(s, m->uuid.hi); + write_uint64(s, m->uuid.lo); + write_uint64(s, m->build_id.hi); + write_uint64(s, m->build_id.lo); + } + } + write_int32(s, 0); +} + +// "magic" string and version header of .ji file +static const int JI_FORMAT_VERSION = 12; +static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature +static const uint16_t BOM = 0xFEFF; // byte-order marker +static void write_header(ios_t *s) +{ + ios_write(s, JI_MAGIC, strlen(JI_MAGIC)); + write_uint16(s, JI_FORMAT_VERSION); + ios_write(s, (char *) &BOM, 2); + write_uint8(s, sizeof(void*)); + ios_write(s, JL_BUILD_UNAME, strlen(JL_BUILD_UNAME)+1); + ios_write(s, JL_BUILD_ARCH, strlen(JL_BUILD_ARCH)+1); + ios_write(s, JULIA_VERSION_STRING, strlen(JULIA_VERSION_STRING)+1); + const char *branch = jl_git_branch(), *commit = jl_git_commit(); + ios_write(s, branch, strlen(branch)+1); + ios_write(s, commit, strlen(commit)+1); + write_uint64(s, 0); // eventually will hold checksum for the content portion of this (build_id.hi) +} + +// serialize information about the result of deserializing this file +static void write_worklist_for_header(ios_t *s, jl_array_t *worklist) +{ + int i, l = jl_array_len(worklist); + for (i = 0; i < l; i++) { + jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, i); + if (workmod->parent == jl_main_module || workmod->parent == workmod) { + size_t l = strlen(jl_symbol_name(workmod->name)); + write_int32(s, l); + ios_write(s, jl_symbol_name(workmod->name), l); + write_uint64(s, workmod->uuid.hi); + write_uint64(s, workmod->uuid.lo); + write_uint64(s, workmod->build_id.lo); + } + } + write_int32(s, 0); +} + +static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT +{ + if (depmod->parent == jl_main_module || depmod->parent == depmod) + return; + const char *mname = jl_symbol_name(depmod->name); + size_t slen = strlen(mname); + write_module_path(s, depmod->parent); + write_int32(s, slen); + ios_write(s, mname, slen); +} + +// Cache file header +// Serialize the global Base._require_dependencies array of pathnames that +// are include dependencies. Also write Preferences and return +// the location of the srctext "pointer" in the header index. +static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t **udepsp) +{ + int64_t initial_pos = 0; + int64_t pos = 0; + static jl_array_t *deps = NULL; + if (!deps) + deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies")); + + // unique(deps) to eliminate duplicates while preserving order: + // we preserve order so that the topmost included .jl file comes first + static jl_value_t *unique_func = NULL; + if (!unique_func) + unique_func = jl_get_global(jl_base_module, jl_symbol("unique")); + jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps}; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); + ct->world_age = last_age; + + // write a placeholder for total size so that we can quickly seek past all of the + // dependencies if we don't need them + initial_pos = ios_pos(s); + write_uint64(s, 0); + size_t i, l = udeps ? jl_array_len(udeps) : 0; + for (i = 0; i < l; i++) { + jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath + size_t slen = jl_string_len(dep); + write_int32(s, slen); + ios_write(s, jl_string_data(dep), slen); + write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime + jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module + jl_module_t *depmod_top = depmod; + while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) + depmod_top = depmod_top->parent; + unsigned provides = 0; + size_t j, lj = jl_array_len(worklist); + for (j = 0; j < lj; j++) { + jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, j); + if (workmod->parent == jl_main_module || workmod->parent == workmod) { + ++provides; + if (workmod == depmod_top) { + write_int32(s, provides); + write_module_path(s, depmod); + break; + } + } + } + write_int32(s, 0); + } + write_int32(s, 0); // terminator, for ease of reading + + // Calculate Preferences hash for current package. + jl_value_t *prefs_hash = NULL; + jl_value_t *prefs_list = NULL; + JL_GC_PUSH1(&prefs_list); + if (jl_base_module) { + // Toplevel module is the module we're currently compiling, use it to get our preferences hash + jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__")); + jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash")); + jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences")); + + if (toplevel && prefs_hash_func && get_compiletime_prefs_func) { + // Temporary invoke in newest world age + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + + // call get_compiletime_prefs(__toplevel__) + jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL}; + prefs_list = (jl_value_t*)jl_apply(args, 2); + + // Call get_preferences_hash(__toplevel__, prefs_list) + args[0] = prefs_hash_func; + args[2] = prefs_list; + prefs_hash = (jl_value_t*)jl_apply(args, 3); + + // Reset world age to normal + ct->world_age = last_age; + } + } + + // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file. + if (prefs_hash != NULL && prefs_list != NULL) { + size_t i, l = jl_array_len(prefs_list); + for (i = 0; i < l; i++) { + jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i); + size_t slen = jl_string_len(pref_name); + write_int32(s, slen); + ios_write(s, jl_string_data(pref_name), slen); + } + write_int32(s, 0); // terminator + write_uint64(s, jl_unbox_uint64(prefs_hash)); + } + else { + // This is an error path, but let's at least generate a valid `.ji` file. + // We declare an empty list of preference names, followed by a zero-hash. + // The zero-hash is not what would be generated for an empty set of preferences, + // and so this `.ji` file will be invalidated by a future non-erroring pass + // through this function. + write_int32(s, 0); + write_uint64(s, 0); + } + JL_GC_POP(); // for prefs_list + + // write a dummy file position to indicate the beginning of the source-text + pos = ios_pos(s); + ios_seek(s, initial_pos); + write_uint64(s, pos - initial_pos); + ios_seek(s, pos); + write_uint64(s, 0); + return pos; +} + + +// Deserialization + +// Add methods to external (non-worklist-owned) functions +static void jl_insert_methods(jl_array_t *list) +{ + size_t i, l = jl_array_len(list); + for (i = 0; i < l; i++) { + jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i); + assert(jl_is_method(meth)); + assert(!meth->is_for_opaque_closure); + jl_methtable_t *mt = jl_method_get_table(meth); + assert((jl_value_t*)mt != jl_nothing); + jl_method_table_insert(mt, meth, NULL); + } +} + +static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key) +{ + size_t i, l = jl_array_len(method_roots_list); + for (i = 0; i < l; i+=2) { + jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i); + jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1); + if (roots) { + assert(jl_is_array(roots)); + jl_append_method_roots(m, key, roots); + } + } +} + +static int remove_code_instance_from_validation(jl_code_instance_t *codeinst) +{ + return ptrhash_remove(&new_code_instance_validate, codeinst); +} + +// verify that these edges intersect with the same methods as before +static jl_array_t *jl_verify_edges(jl_array_t *targets) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + size_t i, l = jl_array_len(targets) / 3; + jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l); + memset(jl_array_data(valids), 1, l); + jl_value_t *loctag = NULL; + jl_value_t *matches = NULL; + JL_GC_PUSH3(&valids, &matches, &loctag); + for (i = 0; i < l; i++) { + jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3); + jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1); + jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2); + int valid = 1; + size_t min_valid = 0; + size_t max_valid = ~(size_t)0; + if (invokesig) { + assert(callee && "unsupported edge"); + jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); + if ((jl_value_t*)mt == jl_nothing) { + valid = 0; + } + else { + matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_valid, &max_valid); + if (matches == jl_nothing) { + valid = 0; + } + else { + matches = (jl_value_t*)((jl_method_match_t*)matches)->method; + if (matches != expected) { + valid = 0; + } + } + } + } + else { + jl_value_t *sig; + if (jl_is_method_instance(callee)) + sig = ((jl_method_instance_t*)callee)->specTypes; + else + sig = callee; + assert(jl_is_array(expected)); + int ambig = 0; + // TODO: possibly need to included ambiguities too (for the optimizer correctness)? + matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, + -1, 0, world, &min_valid, &max_valid, &ambig); + if (matches == jl_false) { + valid = 0; + } + else { + // setdiff!(matches, expected) + size_t j, k, ins = 0; + if (jl_array_len(matches) != jl_array_len(expected)) { + valid = 0; + } + for (k = 0; k < jl_array_len(matches); k++) { + jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method; + size_t l = jl_array_len(expected); + for (j = 0; j < l; j++) + if (match == (jl_method_t*)jl_array_ptr_ref(expected, j)) + break; + if (j == l) { + // intersection has a new method or a method was + // deleted--this is now probably no good, just invalidate + // everything about it now + valid = 0; + if (!_jl_debug_method_invalidation) + break; + jl_array_ptr_set(matches, ins++, match); + } + } + if (!valid && _jl_debug_method_invalidation) + jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins); + } + } + jl_array_uint8_set(valids, i, valid); + if (!valid && _jl_debug_method_invalidation) { + jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee); + loctag = jl_cstr_to_string("insert_backedges_callee"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + loctag = jl_box_int32((int32_t)i); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches); + } + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig); + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee); + //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); + } + JL_GC_POP(); + return valids; +} + +// Combine all edges relevant to a method into the visited table +static void jl_verify_methods(jl_array_t *edges, jl_array_t *valids, htable_t *visited) +{ + jl_value_t *loctag = NULL; + JL_GC_PUSH1(&loctag); + size_t i, l = jl_array_len(edges) / 2; + htable_new(visited, l); + for (i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); + assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); + int valid = 1; + if (callee_ids == NULL) { + // serializing the edges had failed + valid = 0; + } + else { + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + size_t j; + for (j = 0; valid && j < idxs[0]; j++) { + int32_t idx = idxs[j + 1]; + valid = jl_array_uint8_ref(valids, idx); + if (!valid && _jl_debug_method_invalidation) { + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); + loctag = jl_cstr_to_string("verify_methods"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + loctag = jl_box_int32((int32_t)idx); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + } + } + } + ptrhash_put(visited, caller, (void*)(((char*)HT_NOTFOUND) + valid + 1)); + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); + //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); + // HT_NOTFOUND: valid (no invalid edges) + // HT_NOTFOUND + 1: invalid + // HT_NOTFOUND + 2: need to scan + // HT_NOTFOUND + 3 + depth: in-progress + } + JL_GC_POP(); +} + + +// Propagate the result of cycle-resolution to all edges (recursively) +static int mark_edges_in_worklist(jl_array_t *edges, int idx, jl_method_instance_t *cycle, htable_t *visited, int found) +{ + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); + int oldfound = (char*)ptrhash_get(visited, caller) - (char*)HT_NOTFOUND; + if (oldfound < 3) + return 0; // not in-progress + if (!found) { + ptrhash_remove(visited, (void*)caller); + } + else { + ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); + } + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); + assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + size_t i, badidx = 0, n = jl_array_len(callee_ids); + for (i = idxs[0] + 1; i < n; i++) { + if (mark_edges_in_worklist(edges, idxs[i], cycle, visited, found) && badidx == 0) + badidx = i - idxs[0]; + } + if (_jl_debug_method_invalidation) { + jl_value_t *loctag = NULL; + JL_GC_PUSH1(&loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); + loctag = jl_cstr_to_string("verify_methods"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + jl_method_instance_t *callee = cycle; + if (badidx--) + callee = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * badidx); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee); + JL_GC_POP(); + } + return 1; +} + + +// Visit the entire call graph, starting from edges[idx] to determine if that method is valid +static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, int depth) +{ + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + int found = (char*)ptrhash_get(visited, (void*)caller) - (char*)HT_NOTFOUND; + if (found == 0) + return 1; // valid + if (found == 1) + return 0; // invalid + if (found != 2) + return found - 1; // depth + found = 0; + ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 3 + depth)); // change 2 to in-progress at depth + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); + assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + int cycle = 0; + size_t i, n = jl_array_len(callee_ids); + for (i = idxs[0] + 1; i < n; i++) { + int32_t idx = idxs[i]; + int child_found = jl_verify_graph_edge(edges, idx, visited, depth + 1); + if (child_found == 0) { + found = 1; + if (_jl_debug_method_invalidation) { + jl_value_t *loctag = NULL; + JL_GC_PUSH1(&loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); + loctag = jl_cstr_to_string("verify_methods"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_array_ptr_ref(edges, idx * 2)); + JL_GC_POP(); + } + break; + } + else if (child_found >= 2 && child_found - 2 < cycle) { + // record the cycle will resolve at depth "cycle" + cycle = child_found - 2; + assert(cycle); + } + } + if (!found) { + if (cycle && cycle != depth) + return cycle + 2; + ptrhash_remove(visited, (void*)caller); + } + else { // found invalid + ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); + } + if (cycle) { + // If we are the top of the current cycle, now mark all other parts of + // our cycle by re-walking the backedges graph and marking all WIP + // items as found. + // Be careful to only re-walk as far as we had originally scanned above. + // Or if we found a backedge, also mark all of the other parts of the + // cycle as also having an backedge. + n = i; + for (i = idxs[0] + 1; i < n; i++) { + mark_edges_in_worklist(edges, idxs[i], caller, visited, found); + } + } + return found ? 0 : 1; +} + +// Visit all entries in edges, verify if they are valid +static jl_array_t *jl_verify_graph(jl_array_t *edges, htable_t *visited) +{ + size_t i, n = jl_array_len(edges) / 2; + jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, n); + JL_GC_PUSH1(&valids); + int8_t *valids_data = (int8_t*)jl_array_data(valids); + for (i = 0; i < n; i++) { + valids_data[i] = jl_verify_graph_edge(edges, i, visited, 1); + } + JL_GC_POP(); + return valids; +} + +// Restore backedges to external targets +// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods. +// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods. +static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list) +{ + // determine which CodeInstance objects are still valid in our image + size_t world = jl_atomic_load_acquire(&jl_world_counter); + jl_array_t *valids = jl_verify_edges(ext_targets); + JL_GC_PUSH1(&valids); + htable_t visited; + htable_new(&visited, 0); + jl_verify_methods(edges, valids, &visited); + valids = jl_verify_graph(edges, &visited); + size_t i, l = jl_array_len(edges) / 2; + + // next build a map from external MethodInstances to their CodeInstance for insertion + if (ci_list == NULL) { + htable_reset(&visited, 0); + } + else { + size_t i, l = jl_array_len(ci_list); + htable_reset(&visited, l); + for (i = 0; i < l; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i); + assert(ptrhash_get(&visited, (void*)ci->def) == HT_NOTFOUND); // check that we don't have multiple cis for same mi + ptrhash_put(&visited, (void*)ci->def, (void*)ci); + } + } + + // next disable any invalid codes, so we do not try to enable them + for (i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + int valid = jl_array_uint8_ref(valids, i); + if (valid) + continue; + void *ci = ptrhash_get(&visited, (void*)caller); + if (ci != HT_NOTFOUND) { + assert(jl_is_code_instance(ci)); + remove_code_instance_from_validation((jl_code_instance_t*)ci); // mark it as handled + } + else { + jl_code_instance_t *codeinst = caller->cache; + while (codeinst) { + remove_code_instance_from_validation(codeinst); // should be left invalid + codeinst = jl_atomic_load_relaxed(&codeinst->next); + } + } + } + + // finally enable any applicable new codes + for (i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + int valid = jl_array_uint8_ref(valids, i); + if (!valid) + continue; + // if this callee is still valid, add all the backedges + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + for (size_t j = 0; j < idxs[0]; j++) { + int32_t idx = idxs[j + 1]; + jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3); + jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1); + if (callee && jl_is_method_instance(callee)) { + jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller); + } + else { + jl_value_t *sig = callee == NULL ? invokesig : callee; + jl_methtable_t *mt = jl_method_table_for(sig); + // FIXME: rarely, `callee` has an unexpected `Union` signature, + // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344 + // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)` + // This workaround exposes us to (rare) 265-violations. + if ((jl_value_t*)mt != jl_nothing) + jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller); + } + } + // then enable it + void *ci = ptrhash_get(&visited, (void*)caller); + if (ci != HT_NOTFOUND) { + // have some new external code to use + assert(jl_is_code_instance(ci)); + jl_code_instance_t *codeinst = (jl_code_instance_t*)ci; + remove_code_instance_from_validation(codeinst); // mark it as handled + assert(codeinst->min_world >= world && codeinst->inferred); + codeinst->max_world = ~(size_t)0; + if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { + jl_mi_cache_insert(caller, codeinst); + } + } + else { + jl_code_instance_t *codeinst = caller->cache; + while (codeinst) { + if (remove_code_instance_from_validation(codeinst)) { // mark it as handled + assert(codeinst->min_world >= world && codeinst->inferred); + codeinst->max_world = ~(size_t)0; + } + codeinst = jl_atomic_load_relaxed(&codeinst->next); + } + } + } + + htable_free(&visited); + JL_GC_POP(); +} + +static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges) +{ + size_t l = edges ? jl_array_len(edges) / 2 : 0; + for (size_t i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + ptrhash_put(callers_with_edges, (void*)caller, (void*)caller); + } +} + +static void validate_new_code_instances(void) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + size_t i; + for (i = 0; i < new_code_instance_validate.size; i += 2) { + if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) { + //assert(0 && "unexpected unprocessed CodeInstance found"); + jl_code_instance_t *ci = (jl_code_instance_t*)new_code_instance_validate.table[i]; + JL_GC_PROMISE_ROOTED(ci); // TODO: this needs a root (or restructuring to avoid it) + assert(ci->min_world >= world && ci->inferred); + ci->max_world = ~(size_t)0; + jl_method_instance_t *caller = ci->def; + if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { + jl_mi_cache_insert(caller, ci); + } + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); + //ios_puts("FREE\n", ios_stderr); + } + } +} + +static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods) +{ + if (!jl_main_module->build_id.lo) { + return jl_get_exceptionf(jl_errorexception_type, + "Main module uuid state is invalid for module deserialization."); + } + size_t i, l = jl_array_len(depmods); + for (i = 0; ; i++) { + size_t len = read_int32(s); + if (len == 0 && i == l) + return NULL; // success + if (len == 0 || i == l) + return jl_get_exceptionf(jl_errorexception_type, "Wrong number of entries in module list."); + char *name = (char*)alloca(len + 1); + ios_readall(s, name, len); + name[len] = '\0'; + jl_uuid_t uuid; + uuid.hi = read_uint64(s); + uuid.lo = read_uint64(s); + jl_uuid_t build_id; + build_id.hi = read_uint64(s); + build_id.lo = read_uint64(s); + jl_sym_t *sym = _jl_symbol(name, len); + jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(depmods, i); + if (!m || !jl_is_module(m) || m->uuid.hi != uuid.hi || m->uuid.lo != uuid.lo || m->name != sym || + m->build_id.hi != build_id.hi || m->build_id.lo != build_id.lo) { + return jl_get_exceptionf(jl_errorexception_type, + "Invalid input in module list: expected %s.", name); + } + } +} + +static int readstr_verify(ios_t *s, const char *str, int include_null) +{ + size_t i, len = strlen(str) + include_null; + for (i = 0; i < len; ++i) + if ((char)read_uint8(s) != str[i]) + return 0; + return 1; +} + +JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s) +{ + uint16_t bom; + if (readstr_verify(s, JI_MAGIC, 0) && + read_uint16(s) == JI_FORMAT_VERSION && + ios_read(s, (char *) &bom, 2) == 2 && bom == BOM && + read_uint8(s) == sizeof(void*) && + readstr_verify(s, JL_BUILD_UNAME, 1) && + readstr_verify(s, JL_BUILD_ARCH, 1) && + readstr_verify(s, JULIA_VERSION_STRING, 1) && + readstr_verify(s, jl_git_branch(), 1) && + readstr_verify(s, jl_git_commit(), 1)) + return read_uint64(s); + return 0; +} diff --git a/src/subtype.c b/src/subtype.c index 9a5a9fdbbbfd48..cbb11520190cbf 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -1289,8 +1289,10 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param) return issub; } while (xd != jl_any_type && xd->name != yd->name) { - if (xd->super == NULL) + if (xd->super == NULL) { + assert(xd->parameters && jl_is_typename(xd->name)); jl_errorf("circular type parameter constraint in definition of %s", jl_symbol_name(xd->name->name)); + } xd = xd->super; } if (xd == jl_any_type) return 0; diff --git a/src/support/arraylist.h b/src/support/arraylist.h index 03bfd45f8f525d..6ad2f0e2f28c9e 100644 --- a/src/support/arraylist.h +++ b/src/support/arraylist.h @@ -25,7 +25,7 @@ void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT; void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT; void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT; -void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT; +JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT; typedef struct { uint32_t len; diff --git a/src/support/rle.h b/src/support/rle.h index f85d9f35c4b803..bd2fdafc0f79f6 100644 --- a/src/support/rle.h +++ b/src/support/rle.h @@ -10,6 +10,7 @@ extern "C" { #include #include #include +#include "analyzer_annotations.h" /* Run-length encoding (RLE) utilities */ /* In the RLE table, even indexes encode the key (the item classification), odd indexes encode the item index */ @@ -28,8 +29,8 @@ typedef struct _rle_iter_state_t { uint64_t key; // current identifier } rle_iter_state; -rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0); -int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs); +rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0) JL_NOTSAFEPOINT; +int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs) JL_NOTSAFEPOINT; /* indexing */ typedef struct { @@ -37,8 +38,8 @@ typedef struct { int index; // number of preceding items in the list with the same key } rle_reference; -void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0); -size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0); +void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0) JL_NOTSAFEPOINT; +size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0) JL_NOTSAFEPOINT; #ifdef __cplusplus diff --git a/src/threading.c b/src/threading.c index e33d22c24581a5..dcb57cce23a794 100644 --- a/src/threading.c +++ b/src/threading.c @@ -291,8 +291,10 @@ JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) { +#ifndef __clang_gcanalyzer__ if (jl_get_pgcstack_cb == jl_get_pgcstack_init) jl_get_pgcstack_init(); +#endif // for codegen *f = jl_get_pgcstack_cb; *k = jl_pgcstack_key; diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl index d14d740fc5e5b2..80653353a7c172 100644 --- a/stdlib/LLD_jll/src/LLD_jll.jl +++ b/stdlib/LLD_jll/src/LLD_jll.jl @@ -1,4 +1,3 @@ - # This file is a part of Julia. License is MIT: https://julialang.org/license ## dummy stub for https://github.com/JuliaBinaryWrappers/LLD_jll.jl diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl index 2bf06550b72d6f..1a52c1ec782dea 100644 --- a/stdlib/Profile/src/Allocs.jl +++ b/stdlib/Profile/src/Allocs.jl @@ -144,9 +144,13 @@ end const BacktraceCache = Dict{BTElement,Vector{StackFrame}} # copied from julia_internal.h -const JL_BUFF_TAG = UInt(0x4eadc000) +JL_BUFF_TAG::UInt = ccall(:jl_get_buff_tag, UInt, ()) const JL_GC_UNKNOWN_TYPE_TAG = UInt(0xdeadaa03) +function __init__() + global JL_BUFF_TAG = ccall(:jl_get_buff_tag, UInt, ()) +end + struct CorruptType end struct BufferType end struct UnknownType end diff --git a/test/precompile.jl b/test/precompile.jl index 5b49ad4a3b31a3..eaf755046d3662 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1,6 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license original_depot_path = copy(Base.DEPOT_PATH) +original_load_path = copy(Base.LOAD_PATH) using Test, Distributed, Random @@ -37,7 +38,7 @@ end # method root provenance -rootid(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), Base.parentmodule(m)) +rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64 rootid(m::Method) = rootid(m.module) function root_provenance(m::Method, i::Int) @@ -344,7 +345,7 @@ precompile_test_harness(false) do dir modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile) discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime) - @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) ] + @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ] @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ] @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)), Base.PkgId(Foo) => Base.PkgId(Foo2), @@ -1554,8 +1555,23 @@ precompile_test_harness("issue #46296") do load_path (@eval (using CodeInstancePrecompile)) end -empty!(Base.DEPOT_PATH) -append!(Base.DEPOT_PATH, original_depot_path) +precompile_test_harness("Recursive types") do load_path + write(joinpath(load_path, "RecursiveTypeDef.jl"), + """ + module RecursiveTypeDef + + struct C{T,O} end + struct A{T,N,O} <: AbstractArray{C{T,A{T,N,O}},N} + sz::NTuple{N,Int} + end + + end + """) + Base.compilecache(Base.PkgId("RecursiveTypeDef")) + (@eval (using RecursiveTypeDef)) + a = Base.invokelatest(RecursiveTypeDef.A{Float64,2,String}, (3, 3)) + @test isa(a, AbstractArray) +end @testset "issue 46778" begin f46778(::Any, ::Type{Int}) = 1 @@ -1563,3 +1579,8 @@ append!(Base.DEPOT_PATH, original_depot_path) @test precompile(Tuple{typeof(f46778), Int, DataType}) @test which(f46778, Tuple{Any,DataType}).specializations[1].cache.invoke != C_NULL end + +empty!(Base.DEPOT_PATH) +append!(Base.DEPOT_PATH, original_depot_path) +empty!(Base.LOAD_PATH) +append!(Base.LOAD_PATH, original_load_path)