Skip to content

Commit

Permalink
print feature flags used for matching pkgimage (#50172)
Browse files Browse the repository at this point in the history
```
julia> @CCall jl_dump_host_cpu()::Cvoid
CPU: znver2
Features: sse3, pclmul, ssse3, fma, cx16, sse4.1, sse4.2, movbe, popcnt, aes, xsave, avx, f16c, rdrnd, fsgsbase, bmi, avx2, bmi2, rdseed, adx, clflushopt, clwb, sha, rdpid, sahf, lzcnt, sse4a, prfchw, mwaitx, xsaveopt, xsavec, xsaves, clzero, wbnoinvd

julia> target = only(Base.current_image_targets())
znver2; flags=0; features_en=(sse3, pclmul, ssse3, fma, cx16, sse4.1, sse4.2, movbe, popcnt, aes, xsave, avx, f16c, fsgsbase, bmi, avx2, bmi2, adx, clflushopt, clwb, sha, rdpid, sahf, lzcnt, sse4a, prfchw, mwaitx, xsavec, xsaves, clzero, wbnoinvd)
```

Co-authored-by: Prem Chintalapudi <[email protected]>
Co-authored-by: Jameson Nash <[email protected]>
  • Loading branch information
3 people authored Aug 7, 2023
1 parent 8b5e3e9 commit 958da95
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 41 deletions.
99 changes: 91 additions & 8 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2841,11 +2841,9 @@ get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uu
get_compiletime_preferences(::Nothing) = String[]

function check_clone_targets(clone_targets)
try
ccall(:jl_check_pkgimage_clones, Cvoid, (Ptr{Cchar},), clone_targets)
return true
catch
return false
rejection_reason = ccall(:jl_check_pkgimage_clones, Any, (Ptr{Cchar},), clone_targets)
if rejection_reason !== nothing
return rejection_reason
end
end

Expand Down Expand Up @@ -2877,6 +2875,88 @@ function show(io::IO, cf::CacheFlags)
print(io, ", opt_level = ", cf.opt_level)
end

struct ImageTarget
name::String
flags::Int32
ext_features::String
features_en::Vector{UInt8}
features_dis::Vector{UInt8}
end

function parse_image_target(io::IO)
flags = read(io, Int32)
nfeature = read(io, Int32)
feature_en = read(io, 4*nfeature)
feature_dis = read(io, 4*nfeature)
name_len = read(io, Int32)
name = String(read(io, name_len))
ext_features_len = read(io, Int32)
ext_features = String(read(io, ext_features_len))
ImageTarget(name, flags, ext_features, feature_en, feature_dis)
end

function parse_image_targets(targets::Vector{UInt8})
io = IOBuffer(targets)
ntargets = read(io, Int32)
targets = Vector{ImageTarget}(undef, ntargets)
for i in 1:ntargets
targets[i] = parse_image_target(io)
end
return targets
end

function current_image_targets()
targets = @ccall jl_reflect_clone_targets()::Vector{UInt8}
return parse_image_targets(targets)
end

struct FeatureName
name::Cstring
bit::UInt32 # bit index into a `uint32_t` array;
llvmver::UInt32 # 0 if it is available on the oldest LLVM version we support
end

function feature_names()
fnames = Ref{Ptr{FeatureName}}()
nf = Ref{Csize_t}()
@ccall jl_reflect_feature_names(fnames::Ptr{Ptr{FeatureName}}, nf::Ptr{Csize_t})::Cvoid
if fnames[] == C_NULL
@assert nf[] == 0
return Vector{FeatureName}(undef, 0)
end
Base.unsafe_wrap(Array, fnames[], nf[], own=false)
end

function test_feature(features::Vector{UInt8}, feat::FeatureName)
bitidx = feat.bit
u8idx = div(bitidx, 8) + 1
bit = bitidx % 8
return (features[u8idx] & (1 << bit)) != 0
end

function show(io::IO, it::ImageTarget)
print(io, it.name)
if !isempty(it.ext_features)
print(io, ",", it.ext_features)
end
print(io, "; flags=", it.flags)
print(io, "; features_en=(")
first = true
for feat in feature_names()
if test_feature(it.features_en, feat)
name = Base.unsafe_string(feat.name)
if first
first = false
print(io, name)
else
print(io, ", ", name)
end
end
end
print(io, ")")
# Is feature_dis useful?
end

# Set by FileWatching.__init__()
global mkpidlock_hook
global trymkpidlock_hook
Expand Down Expand Up @@ -2914,7 +2994,6 @@ function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300)
f()
end
end

# returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
# otherwise returns the list of dependencies to also check
@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
Expand Down Expand Up @@ -2948,8 +3027,12 @@ end
@debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage"
return true
end
if !check_clone_targets(clone_targets)
@debug "Rejecting cache file $cachefile for $modkey since pkgimage can't be loaded on this target"
rejection_reasons = check_clone_targets(clone_targets)
if !isnothing(rejection_reasons)
@debug("Rejecting cache file $cachefile for $modkey:",
Reasons=rejection_reasons,
var"Image Targets"=parse_image_targets(clone_targets),
var"Current Targets"=current_image_targets())
return true
end
if !isfile(ocachefile)
Expand Down
79 changes: 64 additions & 15 deletions src/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ static inline bool test_nbit(const T1 &bits, T2 _bitidx)
}

template<typename T>
static inline void unset_bits(T &bits)
static inline void unset_bits(T &bits) JL_NOTSAFEPOINT
{
(void)bits;
}

template<typename T, typename T1, typename... Rest>
static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest)
static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) JL_NOTSAFEPOINT
{
auto bitidx = static_cast<uint32_t>(_bitidx);
auto u32idx = bitidx / 32;
Expand Down Expand Up @@ -142,7 +142,7 @@ static inline void set_bit(T &bits, T1 _bitidx, bool val)
template<size_t n>
struct FeatureList {
uint32_t eles[n];
uint32_t &operator[](size_t pos)
uint32_t &operator[](size_t pos) JL_NOTSAFEPOINT
{
return eles[pos];
}
Expand Down Expand Up @@ -297,12 +297,6 @@ static inline void append_ext_features(std::vector<std::string> &features,
* Target specific type/constant definitions, always enable.
*/

struct FeatureName {
const char *name;
uint32_t bit; // bit index into a `uint32_t` array;
uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
};

template<typename CPU, size_t n>
struct CPUSpec {
const char *name;
Expand Down Expand Up @@ -636,7 +630,13 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);

const void *ids = pointers->target_data;
uint32_t target_idx = callback(ids);
jl_value_t* rejection_reason = nullptr;
JL_GC_PUSH1(&rejection_reason);
uint32_t target_idx = callback(ids, &rejection_reason);
if (target_idx == (uint32_t)-1) {
jl_throw(jl_new_struct(jl_errorexception_type, rejection_reason));
}
JL_GC_POP();

if (pointers->header->version != 1) {
jl_error("Image file is not compatible with this version of Julia");
Expand Down Expand Up @@ -855,17 +855,20 @@ struct SysimgMatch {
// Find the best match in the sysimg.
// Select the best one based on the largest vector register and largest compatible feature set.
template<typename S, typename T, typename F>
static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size)
static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size, jl_value_t **rejection_reason)
{
SysimgMatch match;
bool match_name = false;
int feature_size = 0;
std::vector<const char *> rejection_reasons;
rejection_reasons.reserve(sysimg.size());
for (uint32_t i = 0; i < sysimg.size(); i++) {
auto &imgt = sysimg[i];
if (!(imgt.en.features & target.dis.features).empty()) {
// Check sysimg enabled features against runtime disabled features
// This is valid (and all what we can do)
// even if one or both of the targets are unknown.
rejection_reasons.push_back("Rejecting this target due to use of runtime-disabled features\n");
continue;
}
if (imgt.name == target.name) {
Expand All @@ -876,25 +879,44 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v
}
}
else if (match_name) {
rejection_reasons.push_back("Rejecting this target since another target has a cpu name match\n");
continue;
}
int new_vsz = max_vector_size(imgt.en.features);
if (match.vreg_size > new_vsz)
if (match.vreg_size > new_vsz) {
rejection_reasons.push_back("Rejecting this target since another target has a larger vector register size\n");
continue;
}
int new_feature_size = imgt.en.features.nbits();
if (match.vreg_size < new_vsz) {
match.best_idx = i;
match.vreg_size = new_vsz;
feature_size = new_feature_size;
rejection_reasons.push_back("Updating best match to this target due to larger vector register size\n");
continue;
}
if (new_feature_size < feature_size)
if (new_feature_size < feature_size) {
rejection_reasons.push_back("Rejecting this target since another target has a larger feature set\n");
continue;
}
match.best_idx = i;
feature_size = new_feature_size;
rejection_reasons.push_back("Updating best match to this target\n");
}
if (match.best_idx == (uint32_t)-1) {
// Construct a nice error message for debugging purposes
std::string error_msg = "Unable to find compatible target in cached code image.\n";
for (size_t i = 0; i < rejection_reasons.size(); i++) {
error_msg += "Target ";
error_msg += std::to_string(i);
error_msg += " (";
error_msg += sysimg[i].name;
error_msg += "): ";
error_msg += rejection_reasons[i];
}
if (rejection_reason)
*rejection_reason = jl_pchar_to_string(error_msg.data(), error_msg.size());
}
if (match.best_idx == (uint32_t)-1)
jl_error("Unable to find compatible target in system image.");
return match;
}

Expand Down Expand Up @@ -946,3 +968,30 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
#include "processor_fallback.cpp"

#endif

extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets() {
auto specs = jl_get_llvm_clone_targets();
const uint32_t base_flags = 0;
std::vector<uint8_t> data;
auto push_i32 = [&] (uint32_t v) {
uint8_t buff[4];
memcpy(buff, &v, 4);
data.insert(data.end(), buff, buff + 4);
};
push_i32(specs.size());
for (uint32_t i = 0; i < specs.size(); i++) {
push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
auto &specdata = specs[i].data;
data.insert(data.end(), specdata.begin(), specdata.end());
}

jl_value_t *arr = (jl_value_t*)jl_alloc_array_1d(jl_array_uint8_type, data.size());
uint8_t *out = (uint8_t*)jl_array_data(arr);
memcpy(out, data.data(), data.size());
return arr;
}

extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **fnames, size_t *nf) {
*fnames = feature_names;
*nf = nfeature_names;
}
11 changes: 10 additions & 1 deletion src/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
// Dump the name and feature set of the host CPU
// For debugging only
JL_DLLEXPORT void jl_dump_host_cpu(void);
JL_DLLEXPORT void jl_check_pkgimage_clones(char* data);
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);

JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
Expand Down Expand Up @@ -274,6 +274,15 @@ struct jl_target_spec_t {
extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT;
std::string jl_get_cpu_name_llvm(void) JL_NOTSAFEPOINT;
std::string jl_get_cpu_features_llvm(void) JL_NOTSAFEPOINT;

struct FeatureName {
const char *name;
uint32_t bit; // bit index into a `uint32_t` array;
uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
};

extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets();
extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **feature_names, size_t *nfeatures);
#endif

#endif
21 changes: 14 additions & 7 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1561,7 +1561,7 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
#endif
}

static uint32_t sysimg_init_cb(const void *id)
static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
{
// First see what target is requested for the JIT.
auto &cmdline = get_cmdline_targets();
Expand All @@ -1573,7 +1573,9 @@ static uint32_t sysimg_init_cb(const void *id)
t.name = nname;
}
}
auto match = match_sysimg_targets(sysimg, target, max_vector_size);
auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
if (match.best_idx == -1)
return match.best_idx;
// Now we've decided on which sysimg version to use.
// Make sure the JIT target is compatible with it and save the JIT target.
if (match.vreg_size != max_vector_size(target.en.features) &&
Expand All @@ -1586,7 +1588,7 @@ static uint32_t sysimg_init_cb(const void *id)
return match.best_idx;
}

static uint32_t pkgimg_init_cb(const void *id)
static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason JL_REQUIRE_ROOTED_SLOT)
{
TargetData<feature_sz> target = jit_targets.front();
auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
Expand All @@ -1595,8 +1597,7 @@ static uint32_t pkgimg_init_cb(const void *id)
t.name = nname;
}
}
auto match = match_sysimg_targets(pkgimg, target, max_vector_size);

auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
return match.best_idx;
}

Expand Down Expand Up @@ -1823,9 +1824,15 @@ jl_image_t jl_init_processor_pkgimg(void *hdl)
return parse_sysimg(hdl, pkgimg_init_cb);
}

JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
{
pkgimg_init_cb(data);
jl_value_t *rejection_reason = NULL;
JL_GC_PUSH1(&rejection_reason);
uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
JL_GC_POP();
if (match_idx == (uint32_t)-1)
return rejection_reason;
return jl_nothing;
}

std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
Expand Down
Loading

0 comments on commit 958da95

Please sign in to comment.