[AArch64] Allow single-element vector FP converts with +sme2p2 #112905

MacDue · 2024-10-18T14:15:06Z

Follow up to #112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2.

Reference:

llvmbot · 2024-10-18T14:15:44Z

@llvm/pr-subscribers-backend-aarch64

Author: Benjamin Maxwell (MacDue)

Changes

Follow up to #112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2.

Reference:

Full diff: https://github.com/llvm/llvm-project/pull/112905.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+7-6)
(modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll (+36-35)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6c9f0986b9e349..87f0cd821b1246 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -280,6 +280,10 @@ def HasSMEF16F16orSMEF8F16
 def HasNEONandIsStreamingSafe
     : Predicate<"Subtarget->hasNEON()">,
       AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
+// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
+def HasNEONandIsSME2p2StreamingSafe
+    : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
+    AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
 def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
                                  AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
 def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -6270,8 +6274,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // Some float -> int -> float conversion patterns for which we want to keep the
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
 def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
 def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6281,8 +6284,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
 def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
 def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
 def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6305,8 +6307,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
 
 // fp16: integer extraction from vector must be at least 32-bits to be legal.
 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
-// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
-let Predicates = [HasNEON, HasFullFP16] in {
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
 def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
                 (v8i16 FPR128:$Rn), (i64 0))), i16)))),
           (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 9aadf3133ba197..f402463de7be81 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
+; RUN: llc -force-streaming-compatible -mattr=+sme2p2  < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -11,11 +12,11 @@ define double @t1(double %x) {
 ; CHECK-NEXT:    scvtf d0, x8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t1:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzs d0, d0
-; NON-STREAMING-NEXT:    scvtf d0, d0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t1:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzs d0, d0
+; USE-NEON-NO-GPRS-NEXT:    scvtf d0, d0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi double %x to i64
   %conv1 = sitofp i64 %conv to double
@@ -29,11 +30,11 @@ define float @t2(float %x) {
 ; CHECK-NEXT:    scvtf s0, w8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t2:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzs s0, s0
-; NON-STREAMING-NEXT:    scvtf s0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t2:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzs s0, s0
+; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi float %x to i32
   %conv1 = sitofp i32 %conv to float
@@ -49,13 +50,13 @@ define half @t3(half %x)  {
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t3:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvt s0, h0
-; NON-STREAMING-NEXT:    fcvtzs s0, s0
-; NON-STREAMING-NEXT:    scvtf s0, s0
-; NON-STREAMING-NEXT:    fcvt h0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t3:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvt s0, h0
+; USE-NEON-NO-GPRS-NEXT:    fcvtzs s0, s0
+; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi half %x to i32
   %conv1 = sitofp i32 %conv to half
@@ -69,11 +70,11 @@ define double @t4(double %x) {
 ; CHECK-NEXT:    ucvtf d0, x8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t4:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzu d0, d0
-; NON-STREAMING-NEXT:    ucvtf d0, d0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t4:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzu d0, d0
+; USE-NEON-NO-GPRS-NEXT:    ucvtf d0, d0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui double %x to i64
   %conv1 = uitofp i64 %conv to double
@@ -87,11 +88,11 @@ define float @t5(float %x) {
 ; CHECK-NEXT:    ucvtf s0, w8
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t5:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvtzu s0, s0
-; NON-STREAMING-NEXT:    ucvtf s0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t5:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvtzu s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui float %x to i32
   %conv1 = uitofp i32 %conv to float
@@ -107,13 +108,13 @@ define half @t6(half %x)  {
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
 ;
-; NON-STREAMING-LABEL: t6:
-; NON-STREAMING:       // %bb.0: // %entry
-; NON-STREAMING-NEXT:    fcvt s0, h0
-; NON-STREAMING-NEXT:    fcvtzu s0, s0
-; NON-STREAMING-NEXT:    ucvtf s0, s0
-; NON-STREAMING-NEXT:    fcvt h0, s0
-; NON-STREAMING-NEXT:    ret
+; USE-NEON-NO-GPRS-LABEL: t6:
+; USE-NEON-NO-GPRS:       // %bb.0: // %entry
+; USE-NEON-NO-GPRS-NEXT:    fcvt s0, h0
+; USE-NEON-NO-GPRS-NEXT:    fcvtzu s0, s0
+; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
+; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
+; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui half %x to i32
   %conv1 = uitofp i32 %conv to half

Follow up to llvm#112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2. Reference: - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--

llvm-ci · 2024-10-24T10:33:30Z

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/7039

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-unit :: Utility/./UtilityTests/7/8 (2032 of 2041)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/1/3 (2033 of 2041)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/2/3 (2034 of 2041)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/1/2 (2035 of 2041)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/0/2 (2036 of 2041)
PASS: lldb-unit :: Host/./HostTests/11/12 (2037 of 2041)
PASS: lldb-unit :: Target/./TargetTests/11/14 (2038 of 2041)
PASS: lldb-unit :: Host/./HostTests/3/12 (2039 of 2041)
PASS: lldb-unit :: Process/gdb-remote/./ProcessGdbRemoteTests/8/9 (2040 of 2041)
UNRESOLVED: lldb-api :: tools/lldb-server/TestLldbGdbServer.py (2041 of 2041)
******************** TEST 'lldb-api :: tools/lldb-server/TestLldbGdbServer.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/make --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/tools/lldb-server -p TestLldbGdbServer.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 20.0.0git (https://github.com/llvm/llvm-project.git revision cd0373e029fdca7d6d99677b805e728016574a1f)
  clang revision cd0373e029fdca7d6d99677b805e728016574a1f
  llvm revision cd0373e029fdca7d6d99677b805e728016574a1f
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_another_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_minus_one_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_zero_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_attach_commandline_continue_app_exits_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
Program aborted due to an unhandled Error:
Operation not permitted
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server gdbserver --attach=1694338 --reverse-connect [127.0.0.1]:55423
 #0 0x0000aaaaea9f2ec4 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0xb92ec4)
 #1 0x0000aaaaea9f0ef4 llvm::sys::RunSignalHandlers() (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0xb90ef4)
 #2 0x0000aaaaea9f35d4 SignalHandler(int) Signals.cpp:0:0
 #3 0x0000ffffa614a7dc (linux-vdso.so.1+0x7dc)
 #4 0x0000ffffa594f200 __pthread_kill_implementation ./nptl/pthread_kill.c:44:76

…112905) Follow up to llvm#112213 now that the +sme2p2 feature flag has landed. The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are allowed in streaming SVE mode with +sme2p2. Reference: - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector--

llvmbot added the backend:AArch64 label Oct 18, 2024

MacDue requested review from sdesmalen-arm and kmclaughlin-arm October 18, 2024 16:03

MacDue force-pushed the sme2p2_fp_converts branch from 8ad1d9e to 5f4770c Compare October 22, 2024 13:42

sdesmalen-arm approved these changes Oct 23, 2024

View reviewed changes

MacDue merged commit cd0373e into llvm:main Oct 24, 2024
8 checks passed

MacDue deleted the sme2p2_fp_converts branch October 24, 2024 10:21

frobtech mentioned this pull request Oct 25, 2024

p/libc fexcept t #113664

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Allow single-element vector FP converts with +sme2p2 #112905

[AArch64] Allow single-element vector FP converts with +sme2p2 #112905

MacDue commented Oct 18, 2024

llvmbot commented Oct 18, 2024

llvm-ci commented Oct 24, 2024

[AArch64] Allow single-element vector FP converts with +sme2p2 #112905

[AArch64] Allow single-element vector FP converts with +sme2p2 #112905

Conversation

MacDue commented Oct 18, 2024

llvmbot commented Oct 18, 2024

llvm-ci commented Oct 24, 2024