Skip to content

Commit

Permalink
[AArch64] Allow single-element vector FP converts with +sme2p2 (llvm#…
Browse files Browse the repository at this point in the history
  • Loading branch information
MacDue authored and NoumanAmir657 committed Nov 4, 2024
1 parent cc35e49 commit def97f0
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 41 deletions.
13 changes: 7 additions & 6 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ def HasSMEF16F16orSMEF8F16
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
def HasNEONandIsSME2p2StreamingSafe
: Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
Expand Down Expand Up @@ -6315,8 +6319,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
let Predicates = [HasNEON] in {
let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
Expand All @@ -6326,8 +6329,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;

// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
let Predicates = [HasNEON, HasFullFP16] in {
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
Expand All @@ -6350,8 +6352,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),

// fp16: integer extraction from vector must be at least 32-bits to be legal.
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
let Predicates = [HasNEON, HasFullFP16] in {
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
Expand Down
71 changes: 36 additions & 35 deletions llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS

target triple = "aarch64-unknown-linux-gnu"

Expand All @@ -11,11 +12,11 @@ define double @t1(double %x) {
; CHECK-NEXT: scvtf d0, x8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t1:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs d0, d0
; NON-STREAMING-NEXT: scvtf d0, d0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t1:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi double %x to i64
%conv1 = sitofp i64 %conv to double
Expand All @@ -29,11 +30,11 @@ define float @t2(float %x) {
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t2:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzs s0, s0
; NON-STREAMING-NEXT: scvtf s0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t2:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi float %x to i32
%conv1 = sitofp i32 %conv to float
Expand All @@ -49,13 +50,13 @@ define half @t3(half %x) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t3:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
; NON-STREAMING-NEXT: fcvtzs s0, s0
; NON-STREAMING-NEXT: scvtf s0, s0
; NON-STREAMING-NEXT: fcvt h0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t3:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptosi half %x to i32
%conv1 = sitofp i32 %conv to half
Expand All @@ -69,11 +70,11 @@ define double @t4(double %x) {
; CHECK-NEXT: ucvtf d0, x8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t4:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu d0, d0
; NON-STREAMING-NEXT: ucvtf d0, d0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t4:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0
; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui double %x to i64
%conv1 = uitofp i64 %conv to double
Expand All @@ -87,11 +88,11 @@ define float @t5(float %x) {
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t5:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvtzu s0, s0
; NON-STREAMING-NEXT: ucvtf s0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t5:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui float %x to i32
%conv1 = uitofp i32 %conv to float
Expand All @@ -107,13 +108,13 @@ define half @t6(half %x) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
;
; NON-STREAMING-LABEL: t6:
; NON-STREAMING: // %bb.0: // %entry
; NON-STREAMING-NEXT: fcvt s0, h0
; NON-STREAMING-NEXT: fcvtzu s0, s0
; NON-STREAMING-NEXT: ucvtf s0, s0
; NON-STREAMING-NEXT: fcvt h0, s0
; NON-STREAMING-NEXT: ret
; USE-NEON-NO-GPRS-LABEL: t6:
; USE-NEON-NO-GPRS: // %bb.0: // %entry
; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
; USE-NEON-NO-GPRS-NEXT: ret
entry:
%conv = fptoui half %x to i32
%conv1 = uitofp i32 %conv to half
Expand Down

0 comments on commit def97f0

Please sign in to comment.