Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

print feature flags used for matching pkgimage #50172

Merged
merged 12 commits into from
Aug 7, 2023
99 changes: 91 additions & 8 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2841,11 +2841,9 @@ get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uu
get_compiletime_preferences(::Nothing) = String[]

function check_clone_targets(clone_targets)
try
ccall(:jl_check_pkgimage_clones, Cvoid, (Ptr{Cchar},), clone_targets)
return true
catch
return false
rejection_reason = ccall(:jl_check_pkgimage_clones, Any, (Ptr{Cchar},), clone_targets)
if rejection_reason !== nothing
return rejection_reason
end
end

Expand Down Expand Up @@ -2877,6 +2875,88 @@ function show(io::IO, cf::CacheFlags)
print(io, ", opt_level = ", cf.opt_level)
end

struct ImageTarget
name::String
flags::Int32
ext_features::String
features_en::Vector{UInt8}
features_dis::Vector{UInt8}
end

function parse_image_target(io::IO)
flags = read(io, Int32)
nfeature = read(io, Int32)
feature_en = read(io, 4*nfeature)
feature_dis = read(io, 4*nfeature)
name_len = read(io, Int32)
name = String(read(io, name_len))
ext_features_len = read(io, Int32)
ext_features = String(read(io, ext_features_len))
ImageTarget(name, flags, ext_features, feature_en, feature_dis)
end

function parse_image_targets(targets::Vector{UInt8})
io = IOBuffer(targets)
ntargets = read(io, Int32)
targets = Vector{ImageTarget}(undef, ntargets)
for i in 1:ntargets
targets[i] = parse_image_target(io)
end
return targets
end

function current_image_targets()
targets = @ccall jl_reflect_clone_targets()::Vector{UInt8}
return parse_image_targets(targets)
end

struct FeatureName
name::Cstring
bit::UInt32 # bit index into a `uint32_t` array;
llvmver::UInt32 # 0 if it is available on the oldest LLVM version we support
end

function feature_names()
fnames = Ref{Ptr{FeatureName}}()
nf = Ref{Csize_t}()
@ccall jl_reflect_feature_names(fnames::Ptr{Ptr{FeatureName}}, nf::Ptr{Csize_t})::Cvoid
if fnames[] == C_NULL
@assert nf[] == 0
return Vector{FeatureName}(undef, 0)
end
Base.unsafe_wrap(Array, fnames[], nf[], own=false)
end

function test_feature(features::Vector{UInt8}, feat::FeatureName)
bitidx = feat.bit
u8idx = div(bitidx, 8) + 1
bit = bitidx % 8
return (features[u8idx] & (1 << bit)) != 0
end

function show(io::IO, it::ImageTarget)
print(io, it.name)
if !isempty(it.ext_features)
print(io, ",", it.ext_features)
end
print(io, "; flags=", it.flags)
print(io, "; features_en=(")
first = true
for feat in feature_names()
if test_feature(it.features_en, feat)
name = Base.unsafe_string(feat.name)
if first
first = false
print(io, name)
else
print(io, ", ", name)
end
end
end
print(io, ")")
# Is feature_dis useful?
end

# Set by FileWatching.__init__()
global mkpidlock_hook
global trymkpidlock_hook
Expand Down Expand Up @@ -2914,7 +2994,6 @@ function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300)
f()
end
end

# returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
# otherwise returns the list of dependencies to also check
@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
Expand Down Expand Up @@ -2948,8 +3027,12 @@ end
@debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage"
return true
end
if !check_clone_targets(clone_targets)
@debug "Rejecting cache file $cachefile for $modkey since pkgimage can't be loaded on this target"
rejection_reasons = check_clone_targets(clone_targets)
if !isnothing(rejection_reasons)
@debug("Rejecting cache file $cachefile for $modkey:",
Reasons=rejection_reasons,
var"Image Targets"=parse_image_targets(clone_targets),
var"Current Targets"=current_image_targets())
return true
end
if !isfile(ocachefile)
Expand Down
79 changes: 64 additions & 15 deletions src/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ static inline bool test_nbit(const T1 &bits, T2 _bitidx)
}

template<typename T>
static inline void unset_bits(T &bits)
static inline void unset_bits(T &bits) JL_NOTSAFEPOINT
{
(void)bits;
}

template<typename T, typename T1, typename... Rest>
static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest)
static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) JL_NOTSAFEPOINT
{
auto bitidx = static_cast<uint32_t>(_bitidx);
auto u32idx = bitidx / 32;
Expand Down Expand Up @@ -142,7 +142,7 @@ static inline void set_bit(T &bits, T1 _bitidx, bool val)
template<size_t n>
struct FeatureList {
uint32_t eles[n];
uint32_t &operator[](size_t pos)
uint32_t &operator[](size_t pos) JL_NOTSAFEPOINT
{
return eles[pos];
}
Expand Down Expand Up @@ -297,12 +297,6 @@ static inline void append_ext_features(std::vector<std::string> &features,
* Target specific type/constant definitions, always enable.
*/

struct FeatureName {
const char *name;
uint32_t bit; // bit index into a `uint32_t` array;
uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
};

template<typename CPU, size_t n>
struct CPUSpec {
const char *name;
Expand Down Expand Up @@ -636,7 +630,13 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);

const void *ids = pointers->target_data;
uint32_t target_idx = callback(ids);
jl_value_t* rejection_reason = nullptr;
JL_GC_PUSH1(&rejection_reason);
uint32_t target_idx = callback(ids, &rejection_reason);
if (target_idx == (uint32_t)-1) {
jl_throw(jl_new_struct(jl_errorexception_type, rejection_reason));
}
JL_GC_POP();

if (pointers->header->version != 1) {
jl_error("Image file is not compatible with this version of Julia");
Expand Down Expand Up @@ -855,17 +855,20 @@ struct SysimgMatch {
// Find the best match in the sysimg.
// Select the best one based on the largest vector register and largest compatible feature set.
template<typename S, typename T, typename F>
static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size)
static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size, jl_value_t **rejection_reason)
{
SysimgMatch match;
bool match_name = false;
int feature_size = 0;
std::vector<const char *> rejection_reasons;
rejection_reasons.reserve(sysimg.size());
for (uint32_t i = 0; i < sysimg.size(); i++) {
auto &imgt = sysimg[i];
if (!(imgt.en.features & target.dis.features).empty()) {
// Check sysimg enabled features against runtime disabled features
// This is valid (and all what we can do)
// even if one or both of the targets are unknown.
rejection_reasons.push_back("Rejecting this target due to use of runtime-disabled features\n");
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat! Could we also print which features were disabled?

continue;
}
if (imgt.name == target.name) {
Expand All @@ -876,25 +879,44 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v
}
}
else if (match_name) {
rejection_reasons.push_back("Rejecting this target since another target has a cpu name match\n");
continue;
}
int new_vsz = max_vector_size(imgt.en.features);
if (match.vreg_size > new_vsz)
if (match.vreg_size > new_vsz) {
rejection_reasons.push_back("Rejecting this target since another target has a larger vector register size\n");
continue;
}
int new_feature_size = imgt.en.features.nbits();
if (match.vreg_size < new_vsz) {
match.best_idx = i;
match.vreg_size = new_vsz;
feature_size = new_feature_size;
rejection_reasons.push_back("Updating best match to this target due to larger vector register size\n");
continue;
}
if (new_feature_size < feature_size)
if (new_feature_size < feature_size) {
rejection_reasons.push_back("Rejecting this target since another target has a larger feature set\n");
continue;
}
match.best_idx = i;
feature_size = new_feature_size;
rejection_reasons.push_back("Updating best match to this target\n");
}
if (match.best_idx == (uint32_t)-1) {
// Construct a nice error message for debugging purposes
std::string error_msg = "Unable to find compatible target in cached code image.\n";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally you would wrap this in a std::stringstream, so that + isn't O(n^2) and you get << syntax (or llvm:: raw_string_ostream if you want that instead to be a generic IO object)

for (size_t i = 0; i < rejection_reasons.size(); i++) {
error_msg += "Target ";
error_msg += std::to_string(i);
error_msg += " (";
error_msg += sysimg[i].name;
error_msg += "): ";
error_msg += rejection_reasons[i];
}
if (rejection_reason)
*rejection_reason = jl_pchar_to_string(error_msg.data(), error_msg.size());
}
if (match.best_idx == (uint32_t)-1)
jl_error("Unable to find compatible target in system image.");
return match;
}

Expand Down Expand Up @@ -946,3 +968,30 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
#include "processor_fallback.cpp"

#endif

extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets() {
auto specs = jl_get_llvm_clone_targets();
const uint32_t base_flags = 0;
std::vector<uint8_t> data;
auto push_i32 = [&] (uint32_t v) {
uint8_t buff[4];
memcpy(buff, &v, 4);
data.insert(data.end(), buff, buff + 4);
};
push_i32(specs.size());
for (uint32_t i = 0; i < specs.size(); i++) {
push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
auto &specdata = specs[i].data;
data.insert(data.end(), specdata.begin(), specdata.end());
}

jl_value_t *arr = (jl_value_t*)jl_alloc_array_1d(jl_array_uint8_type, data.size());
uint8_t *out = (uint8_t*)jl_array_data(arr);
memcpy(out, data.data(), data.size());
return arr;
}

extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **fnames, size_t *nf) {
*fnames = feature_names;
*nf = nfeature_names;
}
11 changes: 10 additions & 1 deletion src/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
// Dump the name and feature set of the host CPU
// For debugging only
JL_DLLEXPORT void jl_dump_host_cpu(void);
JL_DLLEXPORT void jl_check_pkgimage_clones(char* data);
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);

JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
Expand Down Expand Up @@ -274,6 +274,15 @@ struct jl_target_spec_t {
extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT;
std::string jl_get_cpu_name_llvm(void) JL_NOTSAFEPOINT;
std::string jl_get_cpu_features_llvm(void) JL_NOTSAFEPOINT;

struct FeatureName {
const char *name;
uint32_t bit; // bit index into a `uint32_t` array;
uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
};

extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets();
extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **feature_names, size_t *nfeatures);
#endif

#endif
21 changes: 14 additions & 7 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1561,7 +1561,7 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
#endif
}

static uint32_t sysimg_init_cb(const void *id)
static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
{
// First see what target is requested for the JIT.
auto &cmdline = get_cmdline_targets();
Expand All @@ -1573,7 +1573,9 @@ static uint32_t sysimg_init_cb(const void *id)
t.name = nname;
}
}
auto match = match_sysimg_targets(sysimg, target, max_vector_size);
auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
if (match.best_idx == -1)
return match.best_idx;
// Now we've decided on which sysimg version to use.
// Make sure the JIT target is compatible with it and save the JIT target.
if (match.vreg_size != max_vector_size(target.en.features) &&
Expand All @@ -1586,7 +1588,7 @@ static uint32_t sysimg_init_cb(const void *id)
return match.best_idx;
}

static uint32_t pkgimg_init_cb(const void *id)
static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason JL_REQUIRE_ROOTED_SLOT)
{
TargetData<feature_sz> target = jit_targets.front();
auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
Expand All @@ -1595,8 +1597,7 @@ static uint32_t pkgimg_init_cb(const void *id)
t.name = nname;
}
}
auto match = match_sysimg_targets(pkgimg, target, max_vector_size);

auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
return match.best_idx;
}

Expand Down Expand Up @@ -1823,9 +1824,15 @@ jl_image_t jl_init_processor_pkgimg(void *hdl)
return parse_sysimg(hdl, pkgimg_init_cb);
}

JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
{
pkgimg_init_cb(data);
jl_value_t *rejection_reason = NULL;
JL_GC_PUSH1(&rejection_reason);
uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
JL_GC_POP();
if (match_idx == (uint32_t)-1)
return rejection_reason;
return jl_nothing;
}

std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
Expand Down
Loading