-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Allow single-element vector FP converts with +sme2p2 #112905
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesFollow up to #112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2. Reference:
Full diff: https://github.com/llvm/llvm-project/pull/112905.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6c9f0986b9e349..87f0cd821b1246 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -280,6 +280,10 @@ def HasSMEF16F16orSMEF8F16
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
+// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
+def HasNEONandIsSME2p2StreamingSafe
+ : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
+ AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -6270,8 +6274,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6281,8 +6284,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6305,8 +6307,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
// fp16: integer extraction from vector must be at least 32-bits to be legal.
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 9aadf3133ba197..f402463de7be81 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
+; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
target triple = "aarch64-unknown-linux-gnu"
@@ -11,11 +12,11 @@ define double @t1(double %x) {
; CHECK-NEXT: scvtf d0, x8
; CHECK-NEXT: ret
;
-; NON-STREAMING-LABEL: t1:
-; NON-STREAMING: // %bb.0: // %entry
-; NON-STREAMING-NEXT: fcvtzs d0, d0
-; NON-STREAMING-NEXT: scvtf d0, d0
-; NON-STREAMING-NEXT: ret
+; USE-NEON-NO-GPRS-LABEL: t1:
+; USE-NEON-NO-GPRS: // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
+; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
+; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
@@ -29,11 +30,11 @@ define float @t2(float %x) {
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
;
-; NON-STREAMING-LABEL: t2:
-; NON-STREAMING: // %bb.0: // %entry
-; NON-STREAMING-NEXT: fcvtzs s0, s0
-; NON-STREAMING-NEXT: scvtf s0, s0
-; NON-STREAMING-NEXT: ret
+; USE-NEON-NO-GPRS-LABEL: t2:
+; USE-NEON-NO-GPRS: // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
+; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
@@ -49,13 +50,13 @@ define half @t3(half %x) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
-; NON-STREAMING-LABEL: t3:
-; NON-STREAMING: // %bb.0: // %entry
-; NON-STREAMING-NEXT: fcvt s0, h0
-; NON-STREAMING-NEXT: fcvtzs s0, s0
-; NON-STREAMING-NEXT: scvtf s0, s0
-; NON-STREAMING-NEXT: fcvt h0, s0
-; NON-STREAMING-NEXT: ret
+; USE-NEON-NO-GPRS-LABEL: t3:
+; USE-NEON-NO-GPRS: // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0
+; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
+; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
+; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
@@ -69,11 +70,11 @@ define double @t4(double %x) {
; CHECK-NEXT: ucvtf d0, x8
; CHECK-NEXT: ret
;
-; NON-STREAMING-LABEL: t4:
-; NON-STREAMING: // %bb.0: // %entry
-; NON-STREAMING-NEXT: fcvtzu d0, d0
-; NON-STREAMING-NEXT: ucvtf d0, d0
-; NON-STREAMING-NEXT: ret
+; USE-NEON-NO-GPRS-LABEL: t4:
+; USE-NEON-NO-GPRS: // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0
+; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
+; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
@@ -87,11 +88,11 @@ define float @t5(float %x) {
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ret
;
-; NON-STREAMING-LABEL: t5:
-; NON-STREAMING: // %bb.0: // %entry
-; NON-STREAMING-NEXT: fcvtzu s0, s0
-; NON-STREAMING-NEXT: ucvtf s0, s0
-; NON-STREAMING-NEXT: ret
+; USE-NEON-NO-GPRS-LABEL: t5:
+; USE-NEON-NO-GPRS: // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
+; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
@@ -107,13 +108,13 @@ define half @t6(half %x) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
-; NON-STREAMING-LABEL: t6:
-; NON-STREAMING: // %bb.0: // %entry
-; NON-STREAMING-NEXT: fcvt s0, h0
-; NON-STREAMING-NEXT: fcvtzu s0, s0
-; NON-STREAMING-NEXT: ucvtf s0, s0
-; NON-STREAMING-NEXT: fcvt h0, s0
-; NON-STREAMING-NEXT: ret
+; USE-NEON-NO-GPRS-LABEL: t6:
+; USE-NEON-NO-GPRS: // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0
+; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
+; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
+; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
|
Follow up to llvm#112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2. Reference: - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--
8ad1d9e
to
5f4770c
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/7039 Here is the relevant piece of the build log for the reference
|
…112905) Follow up to llvm#112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2. Reference: - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--
Follow up to #112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2.
Reference: