-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Adopt updated B16B16 target flags #104602
Conversation
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-backend-aarch64 Author: None (SpencerAbson) ChangesThe enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which:
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches.
Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features. The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change. Full diff: https://github.com/llvm/llvm-project/pull/104602.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 94c093d8911562..fb11d743fd6479 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_
multiclass MinMaxIntr<string i, string zm, string mul, string t> {
def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
- def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+ def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
}
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
}
multiclass SInstMinMaxByVector<string name> {
- def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
- def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+ def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+ def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
- def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
- def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+ def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+ def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
}
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>;
}
+multiclass BfSingleMultiVector<string name> {
+ def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>;
+ def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>;
+
+ def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_f" # name # "_x2", [IsStreaming], []>;
+ def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_f" # name # "_x4", [IsStreaming], []>;
+}
+
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in {
def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x2", [IsStreaming], []>;
def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x4", [IsStreaming], []>;
+
+ // bfmin, bfmax (single, multi)
+ defm SVBFMIN : BfSingleMultiVector<"min">;
+ defm SVBFMAX : BfSingleMultiVector<"max">;
+
+ // bfminnm, bfmaxnm (single, multi)
+ defm SVBFMINNM : BfSingleMultiVector<"minnm">;
+ defm SVBFMAXNM : BfSingleMultiVector<"maxnm">;
}
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 6b969d50610f8b..cbd76a5062a594 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -55,6 +55,7 @@
// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
// CHECK-NEXT: sm4 FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
// CHECK-NEXT: sme FEAT_SME Enable Scalable Matrix Extension (SME)
+// CHECK-NEXT: sme-b16b16 FEAT_SME_B16B16 Enable SME2.1 ZA-targeting non-widening BFloat16 instructions
// CHECK-NEXT: sme-f16f16 FEAT_SME_F16F16 Enable SME non-widening Float16 instructions
// CHECK-NEXT: sme-f64f64 FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions
// CHECK-NEXT: sme-f8f16 FEAT_SME_F8F16 Enable Scalable Matrix Extension (SME) F8F16 instructions
@@ -71,6 +72,7 @@
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions
// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index a1ae0873fc1902..e8ce88a6bd64f0 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -438,6 +438,15 @@ def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1",
def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_SVE_B16B16",
"Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>;
+// FeatureSVEB16B16 and FeatureSMEB16B16 act as aliases for {FeatureB16B16}, and
+// {FeatureB16B16, FeatureSME2} respectively. This allows LLVM-20 interfacing programs
+// that use '+sve-b16b16' and '+sme-b16b16' to compile in LLVM-19.
+def FeatureSVEB16B16 : ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16",
+ "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions", [FeatureB16B16]>;
+
+def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16",
+ "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", [FeatureSME2, FeatureB16B16]>;
+
def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16",
"Enable SME non-widening Float16 instructions", [FeatureSME2]>;
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 3d55b0309d26fd..9d08dd83684c90 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -2005,6 +2005,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
AArch64::AEK_CPA, AArch64::AEK_PAUTHLR,
AArch64::AEK_TLBIW, AArch64::AEK_JSCVT,
AArch64::AEK_FCMA, AArch64::AEK_FP8,
+ AArch64::AEK_SMEB16B16, AArch64::AEK_SVEB16B16,
};
@@ -2043,6 +2044,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16"));
EXPECT_TRUE(llvm::is_contained(Features, "+b16b16"));
EXPECT_TRUE(llvm::is_contained(Features, "+rcpc"));
EXPECT_TRUE(llvm::is_contained(Features, "+rand"));
@@ -2063,6 +2065,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+sme-f64f64"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme-i16i64"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme-f16f16"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sme-b16b16"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme2"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1"));
EXPECT_TRUE(llvm::is_contained(Features, "+hbc"));
@@ -2188,6 +2191,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"lse", "nolse", "+lse", "-lse"},
{"rdm", "nordm", "+rdm", "-rdm"},
{"sve", "nosve", "+sve", "-sve"},
+ {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"},
{"sve2", "nosve2", "+sve2", "-sve2"},
{"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"},
{"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"},
@@ -2212,6 +2216,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"},
{"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"},
{"sme-f16f16", "nosme-f16f16", "+sme-f16f16", "-sme-f16f16"},
+ {"sme-b16b16", "nosme-b16b16", "+sme-b16b16", "-sme-b16b16"},
{"sme2", "nosme2", "+sme2", "-sme2"},
{"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"},
{"hbc", "nohbc", "+hbc", "-hbc"},
@@ -2452,6 +2457,12 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nobf16", "b16b16"}, {"bf16", "b16b16"}, {}},
{AArch64::ARMV8A, {"b16b16", "nobf16"}, {}, {"bf16", "b16b16"}},
+ // b16b16 -> {sve-b16b16, sme-b16b16}
+ {AArch64::ARMV8A, {"nob16b16", "sve-b16b16"}, {"b16b16", "sve-b16b16"}, {}},
+ {AArch64::ARMV8A, {"sve-b16b16", "nob16b16"}, {}, {"sve-b16b16", "b16b16"}},
+ {AArch64::ARMV8A, {"nob16b16", "sme-b16b16"}, {"b16b16", "sme-b16b16"}, {}},
+ {AArch64::ARMV8A, {"sme-b16b16", "nob16b16"}, {}, {"b16b16", "sme-b16b16"}},
+
// sve -> {sve2, f32mm, f64mm}
{AArch64::ARMV8A, {"nosve", "sve2"}, {"sve", "sve2"}, {}},
{AArch64::ARMV8A, {"sve2", "nosve"}, {}, {"sve", "sve2"}},
@@ -2491,7 +2502,7 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"sme-fa64", "nosme"}, {}, {"sme", "sme-fa64"}},
// sme2 -> {sme2p1, ssve-fp8fma, ssve-fp8dot2, ssve-fp8dot4, sme-f8f16,
- // sme-f8f32}
+ // sme-f8f32, sme-b16b16}
{AArch64::ARMV8A, {"nosme2", "sme2p1"}, {"sme2", "sme2p1"}, {}},
{AArch64::ARMV8A, {"sme2p1", "nosme2"}, {}, {"sme2", "sme2p1"}},
{AArch64::ARMV8A,
@@ -2522,6 +2533,8 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"sme-f8f16", "nosme2"}, {}, {"sme2", "sme-f8f16"}},
{AArch64::ARMV8A, {"nosme2", "sme-f8f32"}, {"sme2", "sme-f8f32"}, {}},
{AArch64::ARMV8A, {"sme-f8f32", "nosme2"}, {}, {"sme2", "sme-f8f32"}},
+ {AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}},
+ {AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}},
// fp8 -> {sme-f8f16, sme-f8f32}
{AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}},
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for making this change! The Clang/driver behaviour looks correct to me now. The patch is a lot smaller than the patches that went into main, so hopefully this can still make it into LLVM 19!
The enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which: - FEAT_SVE_B16B16 was weakened - FEAT_SME_B16B16 was introduced New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches. - llvm#101480 - llvm#102501 Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features. The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change.
9c1a30b
to
02cafa8
Compare
@SpencerAbson (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
The enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which:
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches.
Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features.
The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change.