From 0dfe31c3f338b506ab88c33dd3bcad26a24805ed Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 2 Aug 2024 14:53:52 +0100 Subject: [PATCH] [LLVM][SME] Allow optional auto-vectorisation for streaming functions. The command line option enable-scalable-autovec-in-streaming-mode is used to enable scalable vectors but the same check is missing from enableScalableVectorization, which is blocking auto-vectorisation. --- .../AArch64/AArch64TargetTransformInfo.cpp | 5 ++ .../AArch64/AArch64TargetTransformInfo.h | 2 +- .../AArch64/streaming-vectorization.ll | 56 +++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 79c0e45e3aa5b5..f0e28ff6c32ac4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2341,6 +2341,11 @@ std::optional AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic( return std::nullopt; } +bool AArch64TTIImpl::enableScalableVectorization() const { + return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() && + EnableScalableAutovecInStreamingMode); +} + TypeSize AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { switch (K) { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a9189fd53f40bb..4a6457d7a7dbf5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -381,7 +381,7 @@ class AArch64TTIImpl : public BasicTTIImplBase { return ST->isSVEorStreamingSVEAvailable(); } - bool enableScalableVectorization() const { return ST->isSVEAvailable(); } + bool enableScalableVectorization() const; bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll new file mode 100644 index 00000000000000..924d4bfb7836ac --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll @@ -0,0 +1,56 @@ +; REQUIRES: asserts +; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOVEC +; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -enable-scalable-autovec-in-streaming-mode < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,VEC + +target triple = "aarch64-unknown-linux-gnu" + +define void @normal_function(ptr %a, ptr %b, ptr %c) #0 { +; CHECK: LV: Checking a loop in 'normal_function' +; CHECK: LV: Scalable vectorization is available +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 + %zext = zext i8 %1 to i32 + %add = add nsw i32 %zext, %0 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @streaming_function(ptr %a, ptr %b, ptr %c) #0 "aarch64_pstate_sm_enabled" { +; CHECK: LV: Checking a loop in 'streaming_function' +; VEC: LV: Scalable vectorization is available +; NOVEC: LV: Scalable vectorization is explicitly disabled +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 + %zext = zext i8 %1 to i32 + %add = add nsw i32 %zext, %0 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +attributes #0 = { vscale_range(1, 16) "target-features"="+sve,+sme" }