Skip to content

Commit

Permalink
Replace the .ji serialization with sysimage format
Browse files Browse the repository at this point in the history
This unifies two serializers, `dump.c` (used for packages)
and `staticdata.c` (used for system images). It adopts the
`staticdata` strategy, adding support for external linkage,
uniquing of MethodInstances & types, method extensions,
external specializations, and invalidation. This lays the
groundwork for native code caching as done with system images.

Co-authored-by: Valentin Churavy <[email protected]>
Co-authored-by: Jameson Nash <[email protected]>
Co-authored-by: Tim Holy <[email protected]>
  • Loading branch information
3 people committed Nov 28, 2022
1 parent 5495b8d commit faab0de
Show file tree
Hide file tree
Showing 39 changed files with 3,469 additions and 4,434 deletions.
8 changes: 4 additions & 4 deletions base/compiler/typeinfer.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Tracking of newly-inferred MethodInstances during precompilation
# Tracking of newly-inferred CodeInstances during precompilation
const track_newly_inferred = RefValue{Bool}(false)
const newly_inferred = MethodInstance[]
const newly_inferred = CodeInstance[]

# build (and start inferring) the inference frame for the top-level MethodInstance
function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
Expand Down Expand Up @@ -403,11 +403,11 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
# TODO: also don't store inferred code if we've previously decided to interpret this function
if !already_inferred
inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
code_cache(interp)[linfo] = ci = CodeInstance(result, inferred_result, valid_worlds)
if track_newly_inferred[]
m = linfo.def
if isa(m, Method) && m.module != Core
ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo)
push!(newly_inferred, ci)
end
end
end
Expand Down
83 changes: 48 additions & 35 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any}
end

@debug "Loading cache file $path for $pkg"
sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods)
sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false)
if isa(sv, Exception)
return sv
end
Expand Down Expand Up @@ -973,7 +973,7 @@ function run_package_callbacks(modkey::PkgId)
end

# loads a precompile cache file, after checking stale_cachefile tests
function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64)
function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
assert_havelock(require_lock)
loaded = nothing
if root_module_exists(modkey)
Expand Down Expand Up @@ -1021,7 +1021,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St
for i in 1:length(depmods)
dep = depmods[i]
dep isa Module && continue
_, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt64}
_, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
@assert root_module_exists(depkey)
dep = root_module(depkey)
depmods[i] = dep
Expand Down Expand Up @@ -1052,7 +1052,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String)
local depmodnames
io = open(path, "r")
try
isvalid_cache_header(io) || return ArgumentError("Invalid header in cache file $path.")
iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
depmodnames = parse_cache_header(io)[3]
isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.")
finally
Expand All @@ -1074,7 +1074,7 @@ end

# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
# returns the set of modules restored if the cache load succeeded
@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt64)
@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
assert_havelock(require_lock)
paths = find_all_in_cache_path(pkg)
for path_to_try in paths::Vector{String}
Expand All @@ -1087,7 +1087,7 @@ end
for i in 1:length(staledeps)
dep = staledeps[i]
dep isa Module && continue
modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt64}
modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
modpaths = find_all_in_cache_path(modkey)
modfound = false
for modpath_to_try in modpaths::Vector{String}
Expand All @@ -1101,7 +1101,7 @@ end
break
end
if !modfound
@debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $modbuild_id is missing from the cache."
@debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
staledeps = true
break
end
Expand Down Expand Up @@ -1153,7 +1153,7 @@ const package_callbacks = Any[]
const include_callbacks = Any[]

# used to optionally track dependencies when requiring a module:
const _concrete_dependencies = Pair{PkgId,UInt64}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
function _include_dependency(mod::Module, _path::AbstractString)
Expand Down Expand Up @@ -1406,7 +1406,7 @@ function _require(pkg::PkgId, env=nothing)

# attempt to load the module file via the precompile cache locations
if JLOptions().use_compiled_modules != 0
m = _require_search_from_serialized(pkg, path, UInt64(0))
m = _require_search_from_serialized(pkg, path, UInt128(0))
if m isa Module
return m
end
Expand All @@ -1416,7 +1416,7 @@ function _require(pkg::PkgId, env=nothing)
# but it was not handled by the precompile loader, complain
for (concrete_pkg, concrete_build_id) in _concrete_dependencies
if pkg == concrete_pkg
@warn """Module $(pkg.name) with build ID $concrete_build_id is missing from the cache.
@warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
This may mean $pkg does not support precompilation but is imported by a module that does."""
if JLOptions().incremental != 0
# during incremental precompilation, this should be fail-fast
Expand Down Expand Up @@ -1785,9 +1785,13 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
close(tmpio)
p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout)
if success(p)
# append checksum to the end of the .ji file:
open(tmppath, "a+") do f
write(f, _crc32c(seekstart(f)))
# append extra crc to the end of the .ji file:
open(tmppath, "r+") do f
if iszero(isvalid_cache_header(f))
error("Invalid header for $pkg in new cache file $(repr(tmppath)).")
end
seekstart(f)
write(f, _crc32c(f))
end
# inherit permission from the source file (and make them writable)
chmod(tmppath, filemode(path) & 0o777 | 0o200)
Expand All @@ -1807,7 +1811,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
end
end

# this is atomic according to POSIX:
# this is atomic according to POSIX (not Win32):
rename(tmppath, cachefile; force=true)
return cachefile
end
Expand All @@ -1817,13 +1821,16 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
if p.exitcode == 125
return PrecompilableError()
else
error("Failed to precompile $pkg to $tmppath.")
error("Failed to precompile $pkg to $(repr(tmppath)).")
end
end

module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m)
function module_build_id(m::Module)
hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
return (UInt128(hi) << 64) | lo
end

isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios))
isvalid_cache_header(f::IOStream) = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid},), f.ios) # returns checksum id or zero
isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32))

struct CacheHeaderIncludes
Expand Down Expand Up @@ -1897,13 +1904,14 @@ function parse_cache_header(f::IO)
totbytes -= 8
@assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
# read the list of modules that are required to be present during loading
required_modules = Vector{Pair{PkgId, UInt64}}()
required_modules = Vector{Pair{PkgId, UInt128}}()
while true
n = read(f, Int32)
n == 0 && break
sym = String(read(f, n)) # module name
uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
build_id = read(f, UInt64) # build id
build_id = UInt128(read(f, UInt64)) << 64
build_id |= read(f, UInt64)
push!(required_modules, PkgId(uuid, sym) => build_id)
end
return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
Expand All @@ -1912,29 +1920,29 @@ end
function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
ret = parse_cache_header(io)
srcfiles_only || return ret
modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret
_, (includes, _), _, srctextpos, _... = ret
srcfiles = srctext_files(io, srctextpos)
delidx = Int[]
for (i, chi) in enumerate(includes)
chi.filename srcfiles || push!(delidx, i)
end
deleteat!(includes, delidx)
return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
return ret
finally
close(io)
end
end



preferences_hash(f::IO) = parse_cache_header(f)[end]
preferences_hash(f::IO) = parse_cache_header(f)[6]
function preferences_hash(cachefile::String)
io = open(cachefile, "r")
try
if !isvalid_cache_header(io)
if iszero(isvalid_cache_header(io))
throw(ArgumentError("Invalid header in cache file $cachefile."))
end
return preferences_hash(io)
Expand All @@ -1945,22 +1953,22 @@ end


function cache_dependencies(f::IO)
defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f)
_, (includes, _), modules, _... = parse_cache_header(f)
return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime
end

function cache_dependencies(cachefile::String)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return cache_dependencies(io)
finally
close(io)
end
end

function read_dependency_src(io::IO, filename::AbstractString)
modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
srctextpos = parse_cache_header(io)[4]
srctextpos == 0 && error("no source-text stored in cache file")
seek(io, srctextpos)
return _read_dependency_src(io, filename)
Expand All @@ -1983,7 +1991,7 @@ end
function read_dependency_src(cachefile::String, filename::AbstractString)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return read_dependency_src(io, filename)
finally
close(io)
Expand Down Expand Up @@ -2173,12 +2181,13 @@ get_compiletime_preferences(::Nothing) = String[]
# returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
# otherwise returns the list of dependencies to also check
@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
return stale_cachefile(PkgId(""), UInt64(0), modpath, cachefile; ignore_loaded)
return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded)
end
@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt64, modpath::String, cachefile::String; ignore_loaded::Bool = false)
@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false)
io = open(cachefile, "r")
try
if !isvalid_cache_header(io)
checksum = isvalid_cache_header(io)
if iszero(checksum)
@debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
return true # invalid cache file
end
Expand All @@ -2191,9 +2200,12 @@ end
@debug "Rejecting cache file $cachefile for $modkey since it is for $id instead"
return true
end
if build_id != UInt64(0) && id.second != build_id
@debug "Ignoring cache file $cachefile for $modkey since it is does not provide desired build_id"
return true
if build_id != UInt128(0)
id_build = (UInt128(checksum) << 64) | id.second
if id_build != build_id
@debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))"
return true
end
end
id = id.first
modules = Dict{PkgId, UInt64}(modules)
Expand Down Expand Up @@ -2233,11 +2245,12 @@ end
for (req_key, req_build_id) in _concrete_dependencies
build_id = get(modules, req_key, UInt64(0))
if build_id !== UInt64(0)
build_id |= UInt128(checksum) << 64
if build_id === req_build_id
skip_timecheck = true
break
end
@debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $build_id) for $req_key (want $req_build_id)"
@debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
return true # cachefile doesn't provide the required version of the dependency
end
end
Expand Down
2 changes: 1 addition & 1 deletion deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,8 @@ LLVM_TOOLS_JLL_TAGS := -llvm_version+$(LLVM_VER_MAJ)
endif

$(eval $(call bb-install,llvm,LLVM,false,true))
$(eval $(call bb-install,clang,CLANG,false,true))
$(eval $(call bb-install,lld,LLD,false,true))
$(eval $(call bb-install,clang,CLANG,false,true))
$(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))

endif # USE_BINARYBUILDER_LLVM
Expand Down
7 changes: 3 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ endif

SRCS := \
jltypes gf typemap smallintset ast builtins module interpreter symbol \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
dlload sys init task array staticdata toplevel jl_uv datatype \
simplevector runtime_intrinsics precompile jloptions \
threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
Expand Down Expand Up @@ -291,7 +291,6 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
$(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
Expand All @@ -317,7 +316,7 @@ $(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj:
$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
$(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h
$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
Expand Down Expand Up @@ -453,7 +452,7 @@ SA_EXCEPTIONS-jloptions.c := -Xanalyzer -analyzer-config -Xana
SA_EXCEPTIONS-subtype.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
SA_EXCEPTIONS-codegen.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
# these need to be annotated (and possibly fixed)
SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp
SKIP_IMPLICIT_ATOMICS := module.c staticdata.c codegen.cpp
# these need to be annotated (and possibly fixed)
SKIP_GC_CHECK := codegen.cpp rtutils.c

Expand Down
Loading

0 comments on commit faab0de

Please sign in to comment.