Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Actually setup jit targets when compiling packageimages instead of targeting only one #54471

Merged
merged 9 commits into from
Jul 11, 2024
5 changes: 4 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7438,8 +7438,11 @@ static Function* gen_cfun_wrapper(
ctx.builder.ClearInsertionPoint();

if (aliasname) {
GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
GlobalValue::ExternalLinkage, aliasname, cw, M);
if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
}
}

if (nest) {
Expand Down
1 change: 1 addition & 0 deletions src/llvm-multiversioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
trampoline->removeFnAttr("julia.mv.reloc");
trampoline->removeFnAttr("julia.mv.clones");
trampoline->addFnAttr("julia.mv.alias");
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
alias->eraseFromParent();

uint32_t id;
Expand Down
52 changes: 49 additions & 3 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1890,12 +1890,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
return res;
}

#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");

auto &cmdline = get_cmdline_targets();
vchuravy marked this conversation as resolved.
Show resolved Hide resolved
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
if (image_targets.empty())
jl_error("No targets specified");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
auto &features0 = image_targets[t.base].en.features;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
#ifdef _CPU_ARM_
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
static constexpr uint32_t clone_simd[] = {Feature::neon};
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
#endif
}
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1916,6 +1960,8 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
return res;
}

#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
if (feature >= 32 * feature_sz)
Expand Down
23 changes: 19 additions & 4 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
return res;
}

#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");

auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<1>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
t.en.flags |= JL_TARGET_CLONE_ALL;
}
if (image_targets.empty())
jl_error("No image targets found");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
jl_target_spec_t ele;
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
ele.data = serialize_target_data(target.name, target.en.features,
Expand All @@ -161,6 +175,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
Expand Down
80 changes: 76 additions & 4 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
return match.best_idx;
}

//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
// so we won't call sysimg_init_cb, else this function shouldn't do anything.
static void ensure_jit_target(bool imaging)
{
auto &cmdline = get_cmdline_targets();
Expand Down Expand Up @@ -1102,13 +1104,82 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
{feature_masks, 0}, {{}, 0}, 0});
return res;
}

//This function parses the -C command line to figure out which targets to multiversion to.
#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");
auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}

auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
auto &features0 = image_targets[t.base].en.features;
// Special case for KNL/KNM since they're so different
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
if ((t.name == "knl" || t.name == "knm") &&
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
t.en.flags |= JL_TARGET_CLONE_ALL;
break;
}
}
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
Feature::sse41, Feature::sse42,
Feature::avx, Feature::avx2,
Feature::vaes, Feature::vpclmulqdq,
Feature::sse4a, Feature::avx512f,
Feature::avx512dq, Feature::avx512ifma,
Feature::avx512pf, Feature::avx512er,
Feature::avx512cd, Feature::avx512bw,
Feature::avx512vl, Feature::avx512vbmi,
Feature::avx512vpopcntdq, Feature::avxvnni,
Feature::avx512vbmi2, Feature::avx512vnni,
Feature::avx512bitalg, Feature::avx512bf16,
Feature::avx512vp2intersect, Feature::avx512fp16};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
for (auto fe: clone_bf16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
break;
}
}
}
if (image_targets.empty())
jl_error("No targets specified");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1128,6 +1199,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
Expand Down
13 changes: 13 additions & 0 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1963,6 +1963,19 @@ precompile_test_harness("Test flags") do load_path
@test !Base.isprecompiled(id, ;flags=current_flags)
end

if Base.get_bool_env("CI", false) && (Sys.ARCH === :x86_64 || Sys.ARCH === :aarch64)
@testset "Multiversioning" begin # This test isn't the most robust because it relies on being in CI,
pkg = Base.identify_package("Test") # but we need better target reflection to make a better one.
cachefiles = Base.find_all_in_cache_path(pkg)
pkgpath = Base.locate_package(pkg)
idx = findfirst(cachefiles) do cf
Base.stale_cachefile(pkgpath, cf) !== true
end
targets = Base.parse_image_targets(Base.parse_cache_header(cachefiles[idx])[7])
@test length(targets) > 1
end
end

precompile_test_harness("No backedge precompile") do load_path
# Test that the system doesn't accidentally forget to revalidate a method without backedges
write(joinpath(load_path, "NoBackEdges.jl"),
Expand Down