Skip to content

Commit

Permalink
Actually setup jit targets when compiling packageimages instead of ta…
Browse files Browse the repository at this point in the history
…rgeting only one (#54471)

Co-authored-by: Gabriel Baraldi <baraldigabriel@gmail.com>
Co-authored-by: Dilum Aluthge <dilum@aluthge.com>
  • Loading branch information
3 people authored and maleadt committed Oct 21, 2024
1 parent 592152c commit 9c8e43b
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 12 deletions.
5 changes: 4 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7260,8 +7260,11 @@ static Function* gen_cfun_wrapper(
ctx.builder.ClearInsertionPoint();

if (aliasname) {
GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
GlobalValue::ExternalLinkage, aliasname, cw, M);
if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
}
}

if (nest) {
Expand Down
1 change: 1 addition & 0 deletions src/llvm-multiversioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
trampoline->removeFnAttr("julia.mv.reloc");
trampoline->removeFnAttr("julia.mv.clones");
trampoline->addFnAttr("julia.mv.alias");
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
alias->eraseFromParent();

uint32_t id;
Expand Down
52 changes: 49 additions & 3 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1890,12 +1890,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
return res;
}

#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");

auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
if (image_targets.empty())
jl_error("No targets specified");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
auto &features0 = image_targets[t.base].en.features;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
#ifdef _CPU_ARM_
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
static constexpr uint32_t clone_simd[] = {Feature::neon};
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
#endif
}
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1916,6 +1960,8 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
return res;
}

#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
if (feature >= 32 * feature_sz)
Expand Down
23 changes: 19 additions & 4 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
return res;
}

#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");

auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<1>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
t.en.flags |= JL_TARGET_CLONE_ALL;
}
if (image_targets.empty())
jl_error("No image targets found");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
jl_target_spec_t ele;
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
ele.data = serialize_target_data(target.name, target.en.features,
Expand All @@ -161,6 +175,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
Expand Down
80 changes: 76 additions & 4 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,8 @@ static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
return match.best_idx;
}

//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
// so we won't call sysimg_init_cb, else this function shouldn't do anything.
static void ensure_jit_target(bool imaging)
{
auto &cmdline = get_cmdline_targets();
Expand Down Expand Up @@ -1102,13 +1104,82 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
{feature_masks, 0}, {{}, 0}, 0});
return res;
}

//This function parses the -C command line to figure out which targets to multiversion to.
#ifndef __clang_gcanalyzer__
llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");
auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}

auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
auto &features0 = image_targets[t.base].en.features;
// Special case for KNL/KNM since they're so different
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
if ((t.name == "knl" || t.name == "knm") &&
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
t.en.flags |= JL_TARGET_CLONE_ALL;
break;
}
}
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
Feature::sse41, Feature::sse42,
Feature::avx, Feature::avx2,
Feature::vaes, Feature::vpclmulqdq,
Feature::sse4a, Feature::avx512f,
Feature::avx512dq, Feature::avx512ifma,
Feature::avx512pf, Feature::avx512er,
Feature::avx512cd, Feature::avx512bw,
Feature::avx512vl, Feature::avx512vbmi,
Feature::avx512vpopcntdq, Feature::avxvnni,
Feature::avx512vbmi2, Feature::avx512vnni,
Feature::avx512bitalg, Feature::avx512bf16,
Feature::avx512vp2intersect, Feature::avx512fp16};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
for (auto fe: clone_bf16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
break;
}
}
}
if (image_targets.empty())
jl_error("No targets specified");
llvm::SmallVector<jl_target_spec_t, 0> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1128,6 +1199,7 @@ llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
Expand Down
13 changes: 13 additions & 0 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2124,6 +2124,19 @@ precompile_test_harness("Test flags") do load_path
@test !Base.isprecompiled(id, ;flags=current_flags)
end

if Base.get_bool_env("CI", false) && (Sys.ARCH === :x86_64 || Sys.ARCH === :aarch64)
@testset "Multiversioning" begin # This test isn't the most robust because it relies on being in CI,
pkg = Base.identify_package("Test") # but we need better target reflection to make a better one.
cachefiles = Base.find_all_in_cache_path(pkg)
pkgpath = Base.locate_package(pkg)
idx = findfirst(cachefiles) do cf
Base.stale_cachefile(pkgpath, cf) !== true
end
targets = Base.parse_image_targets(Base.parse_cache_header(cachefiles[idx])[7])
@test length(targets) > 1
end
end

precompile_test_harness("Issue #52063") do load_path
fname = joinpath(load_path, "i_do_not_exist.jl")
@test try
Expand Down

0 comments on commit 9c8e43b

Please sign in to comment.