Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Intel APX and AVX10 target flags and LLVM attribute setting. #8052

Merged
merged 14 commits into from
Feb 23, 2024
2 changes: 2 additions & 0 deletions python_bindings/src/halide/halide_/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ void define_enums(py::module &m) {
.value("VulkanV12", Target::VulkanV12)
.value("VulkanV13", Target::VulkanV13)
.value("Semihosting", Target::Feature::Semihosting)
.value("AVX10_1", Target::Feature::AVX10_1)
.value("X86APX", Target::Feature::X86APX)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
43 changes: 38 additions & 5 deletions src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ namespace {
// existing flags, so that instruction patterns can just check for the
// oldest feature flag that supports an instruction.
Target complete_x86_target(Target t) {
if (t.has_feature(Target::AVX10_1)) {
if (t.vector_bits >= 256) {
t.set_feature(Target::AVX2);
}
if (t.vector_bits >= 512) {
t.set_feature(Target::AVX512_SapphireRapids);
}
}
if (t.has_feature(Target::AVX512_SapphireRapids)) {
t.set_feature(Target::AVX512_Zen4);
}
Expand All @@ -54,6 +62,7 @@ Target complete_x86_target(Target t) {
if (t.has_feature(Target::AVX)) {
t.set_feature(Target::SSE41);
}

return t;
}

Expand Down Expand Up @@ -1035,9 +1044,31 @@ string CodeGen_X86::mattrs() const {
}
#if LLVM_VERSION >= 180
if (gather_might_be_slow(target)) {
attrs.push_back("+prefer-no-gather");
attrs.emplace_back("+prefer-no-gather");
}
#endif

if (target.has_feature(Target::AVX10_1)) {
switch (target.vector_bits) {
case 256:
attrs.emplace_back("+avx10.1-256");
break;
case 512:
attrs.emplace_back("+avx10.1-512");
break;
default:
user_error << "AVX10 only supports 256 or 512 bit variants at present.\n";
break;
}
}

if (target.has_feature(Target::X86APX)) {
attrs.emplace_back("+egpr");
attrs.emplace_back("+push2pop2");
attrs.emplace_back("+ppx");
attrs.emplace_back("+ndd");
}

return join_strings(attrs, ",");
}

Expand All @@ -1046,10 +1077,12 @@ bool CodeGen_X86::use_soft_float_abi() const {
}

int CodeGen_X86::native_vector_bits() const {
if (target.has_feature(Target::AVX512) ||
target.has_feature(Target::AVX512_Skylake) ||
target.has_feature(Target::AVX512_KNL) ||
target.has_feature(Target::AVX512_Cannonlake)) {
if (target.has_feature(Target::AVX10_1)) {
return target.vector_bits;
} else if (target.has_feature(Target::AVX512) ||
target.has_feature(Target::AVX512_Skylake) ||
target.has_feature(Target::AVX512_KNL) ||
target.has_feature(Target::AVX512_Cannonlake)) {
return 512;
} else if (target.has_feature(Target::AVX) ||
target.has_feature(Target::AVX2)) {
Expand Down
39 changes: 37 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ Target calculate_host_target() {
// Call cpuid with eax=7, ecx=0
int info2[4];
cpuid(info2, 7, 0);
int info3[4];
cpuid(info3, 7, 1);
const uint32_t avx2 = 1U << 5;
const uint32_t avx512f = 1U << 16;
const uint32_t avx512dq = 1U << 17;
Expand Down Expand Up @@ -283,16 +285,47 @@ Target calculate_host_target() {

const uint32_t avxvnni = 1U << 4; // avxvnni (note, not avx512vnni) result in eax
const uint32_t avx512bf16 = 1U << 5; // bf16 result in eax, with cpuid(eax=7, ecx=1)
int info3[4];
cpuid(info3, 7, 1);
// TODO: port to family/model -based detection.
if ((info3[0] & avxvnni) == avxvnni &&
(info3[0] & avx512bf16) == avx512bf16) {
initial_features.push_back(Target::AVX512_SapphireRapids);
}
}
}

// AVX10 converged vector instructions.
const uint32_t avx10 = 1U << 19;
if (info2[3] & avx10) {
int info_avx10[4];
cpuid(info_avx10, 0x24, 0x0);

// This checks that the AVX10 version is greater than zero.
// It isn't really needed as for now only one version exists, but
// the docs indicate bits 0:7 of EBX should be >= 0 so...
if ((info[1] & 0xff) >= 1) {
initial_features.push_back(Target::AVX10_1);

const uint32_t avx10_128 = 1U << 16;
const uint32_t avx10_256 = 1U << 17;
const uint32_t avx10_512 = 1U << 18;
// Choose the maximum one that is available.
if (info[1] & avx10_512) {
vector_bits = 512;
} else if (info[1] & avx10_256) {
vector_bits = 256;
} else if (info[1] & avx10_128) { // Not clear it is worth turning on AVX10 for this case.
vector_bits = 128;
}
}
}

// APX register extensions, etc.
const uint32_t apx = 1U << 21;
if (info3[3] & apx) {
initial_features.push_back(Target::X86APX);
}
}

#endif
#endif
#endif
Expand Down Expand Up @@ -556,6 +589,8 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"vk_v12", Target::VulkanV12},
{"vk_v13", Target::VulkanV13},
{"semihosting", Target::Semihosting},
{"avx10_1", Target::AVX10_1},
{"x86apx", Target::X86APX},
// NOTE: When adding features to this map, be sure to update PyEnums.cpp as well.
};

Expand Down
2 changes: 2 additions & 0 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ struct Target {
VulkanV12 = halide_target_feature_vulkan_version12,
VulkanV13 = halide_target_feature_vulkan_version13,
Semihosting = halide_target_feature_semihosting,
AVX10_1 = halide_target_feature_avx10_1,
X86APX = halide_target_feature_x86_apx,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,8 @@ typedef enum halide_target_feature_t {
halide_target_feature_vulkan_version12, ///< Enable Vulkan v1.2 runtime target support.
halide_target_feature_vulkan_version13, ///< Enable Vulkan v1.3 runtime target support.
halide_target_feature_semihosting, ///< Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available.
halide_target_feature_avx10_1, ///< Intel AVX10 version 1 support. vector_bits is used to indicate width.
halide_target_feature_x86_apx, ///< Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down
2 changes: 2 additions & 0 deletions test/correctness/simd_op_check_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,5 +673,7 @@ int main(int argc, char **argv) {
Target("x86-64-linux-sse41-avx-f16c-fma-avx2-avx512-avx512_skylake-avx512_cannonlake"),
Target("x86-64-linux-sse41-avx-f16c-fma-avx2-avx512-avx512_skylake-avx512_cannonlake-avx512_zen4"),
Target("x86-64-linux-sse41-avx-f16c-fma-avx2-avx512-avx512_skylake-avx512_cannonlake-avx512_zen4-avx512_sapphirerapids"),
// Can be enabled when AVX10 and APX support are stable in LLVM.
// Target("x86-64-linux-avx10_1-vector_bits_256-x86apx"),
});
}
Loading