[RISCV] Use experimental.vp.splat to splat specific vector length elements. #101329

yetingk · 2024-07-31T12:53:28Z

Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors.
This patch also fixes crash for getEVT not serving ptr types.

llvmbot · 2024-07-31T12:54:08Z

@llvm/pr-subscribers-backend-risc-v

Author: Yeting Kuo (yetingk)

Changes

Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors.

Full diff: https://github.com/llvm/llvm-project/pull/101329.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp (+2-17)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll (+4-4)
(modified) llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll (+38-3)

diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 0a66a38f6d5ab..be2e880ecd3a9 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
   auto *VTy = cast<VectorType>(II.getType());
 
   IRBuilder<> Builder(&II);
-
-  // Extend VL from i32 to XLen if needed.
-  if (ST->is64Bit())
-    VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
   Type *STy = VTy->getElementType();
   Value *Val = Builder.CreateLoad(STy, BasePtr);
-  const auto &TLI = *ST->getTargetLowering();
-  Value *Res;
-
-  // TODO: Also support fixed/illegal vector types to splat with evl = vl.
-  if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
-    unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
-                                              : Intrinsic::riscv_vmv_v_x;
-    Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
-                                  {PoisonValue::get(VTy), Val, VL});
-  } else {
-    Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
-  }
+  Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
+                                       {Val, II.getOperand(2), VL});
 
   II.replaceAllUsesWith(Res);
   II.eraseFromParent();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index b8c7037580c46..849f98c26f459 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
 define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-OPT:       # %bb.0:
-; CHECK-OPT-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-OPT-NEXT:    vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-OPT-NEXT:    vlse8.v v8, (a0), zero
 ; CHECK-OPT-NEXT:    ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-NO-OPT:       # %bb.0:
 ; CHECK-NO-OPT-NEXT:    lbu a0, 0(a0)
-; CHECK-NO-OPT-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NO-OPT-NEXT:    vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-NO-OPT-NEXT:    vmv.v.x v8, a0
 ; CHECK-NO-OPT-NEXT:    ret
   %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3)
@@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
 define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-OPT:       # %bb.0:
-; CHECK-OPT-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-OPT-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-OPT-NEXT:    vlse16.v v8, (a0), zero
 ; CHECK-OPT-NEXT:    ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-NO-OPT:       # %bb.0:
 ; CHECK-NO-OPT-NEXT:    flh fa5, 0(a0)
-; CHECK-NO-OPT-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NO-OPT-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-NO-OPT-NEXT:    vfmv.v.f v8, fa5
 ; CHECK-NO-OPT-NEXT:    ret
   %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 0010f64a93fd6..e9f092b23b336 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -823,15 +823,15 @@ define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
   ret <vscale x 1 x half> %load
 }
 
-define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr) {
-; CHECK-RV32-LABEL: zero_strided_vadd.vx:
+define <vscale x 1 x i64> @zero_strided_vadd_nxv1i64(<vscale x 1 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64:
 ; CHECK-RV32:       # %bb.0:
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
 ; CHECK-RV32-NEXT:    vlse64.v v9, (a0), zero
 ; CHECK-RV32-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-RV32-NEXT:    ret
 ;
-; CHECK-RV64-LABEL: zero_strided_vadd.vx:
+; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64:
 ; CHECK-RV64:       # %bb.0:
 ; CHECK-RV64-NEXT:    ld a0, 0(a0)
 ; CHECK-RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
@@ -842,3 +842,38 @@ define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr)
   %w = add <vscale x 1 x i64> %v, %load
   ret <vscale x 1 x i64> %w
 }
+
+define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    csrr a1, vlenb
+; CHECK-RV32-NEXT:    srli a2, a1, 3
+; CHECK-RV32-NEXT:    sub a3, a2, a1
+; CHECK-RV32-NEXT:    sltu a4, a2, a3
+; CHECK-RV32-NEXT:    addi a4, a4, -1
+; CHECK-RV32-NEXT:    and a3, a4, a3
+; CHECK-RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vlse64.v v24, (a0), zero
+; CHECK-RV32-NEXT:    bltu a2, a1, .LBB55_2
+; CHECK-RV32-NEXT:  # %bb.1:
+; CHECK-RV32-NEXT:    mv a2, a1
+; CHECK-RV32-NEXT:  .LBB55_2:
+; CHECK-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vlse64.v v0, (a0), zero
+; CHECK-RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vadd.vv v16, v16, v24
+; CHECK-RV32-NEXT:    vadd.vv v8, v8, v0
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    ld a0, 0(a0)
+; CHECK-RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-RV64-NEXT:    vadd.vx v8, v8, a0
+; CHECK-RV64-NEXT:    vadd.vx v16, v16, a0
+; CHECK-RV64-NEXT:    ret
+  %vscale = call i32 @llvm.vscale()
+  %load = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i32(ptr %ptr, i32 0, <vscale x 16 x i1> splat (i1 true), i32 %vscale)
+  %w = add <vscale x 16 x i64> %v, %load
+  ret <vscale x 16 x i64> %w
+}

topperc · 2024-07-31T16:06:50Z

Doesn't this fix a crash if the loaded elements have pointer type? Pointer types are not supported by getEVT or the the riscv_vmv_v_x intrinsic from the original code.

…ments. Previously, llvm IR is hard to create a scalable vector splat with a specific vector lenght, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types.

yetingk · 2024-07-31T19:51:23Z

Doesn't this fix a crash if the loaded elements have pointer type? Pointer types are not supported by getEVT or the the riscv_vmv_v_x intrinsic from the original code.

Add ptr vector test case.

topperc

LGTM

preames

LGTM

topperc · 2024-07-31T21:36:42Z

Does this need to be backported to 19.x? Not sure if we are using vp_strided_load in RISCVGatherScatterOpt there. @preames @lukel97 ?

preames · 2024-07-31T21:54:17Z

Does this need to be backported to 19.x? Not sure if we are using vp_strided_load in RISCVGatherScatterOpt there. @preames @lukel97 ?

It should not be, but @luke957 please confirm.

lukel97 · 2024-08-01T03:13:38Z

It should not be, but @luke957 please confirm.

#98111 isn't in 19.x so I don't think this needs backported

yetingk · 2024-08-01T08:57:35Z

#98579 causes crash when the type of vp.stride is pointer vector type, since pointer types are unexpected for getEVT.

lukel97 · 2024-08-01T09:05:15Z

Oh woops I missed the comments above about the pointer type crash. In that case then I think we should backport this given that #98579 is in. Since this is independent of whether or not RISCVGatherScatterLowering emits vp_strided_load

…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee)

…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types.

…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee)

This was done in #101329

This was done in llvm#101329

yetingk requested review from lukel97, mshockwave and topperc July 31, 2024 12:53

llvmbot added the backend:RISC-V label Jul 31, 2024

topperc requested a review from preames July 31, 2024 16:47

yetingk force-pushed the vp-stride-splat branch from 2c631bf to 12d3d79 Compare July 31, 2024 19:50

topperc approved these changes Jul 31, 2024

View reviewed changes

preames approved these changes Jul 31, 2024

View reviewed changes

yetingk merged commit 87af9ee into llvm:main Aug 1, 2024
7 checks passed

topperc mentioned this pull request Aug 1, 2024

Backport #101329 to 19.x #101502

Closed

mgabka mentioned this pull request Aug 22, 2024

Add release note about ABI mgabka/llvm-project#6

Open

lukel97 added a commit that referenced this pull request Nov 4, 2024

[RISCV] Remove completed TODO in VP strided load test. NFC

8bc04e4

This was done in #101329

PhilippRados pushed a commit to PhilippRados/llvm-project that referenced this pull request Nov 6, 2024

[RISCV] Remove completed TODO in VP strided load test. NFC

c754ce8

This was done in llvm#101329

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Use experimental.vp.splat to splat specific vector length elements. #101329

[RISCV] Use experimental.vp.splat to splat specific vector length elements. #101329

yetingk commented Jul 31, 2024 •

edited

Loading

llvmbot commented Jul 31, 2024

topperc commented Jul 31, 2024 •

edited

Loading

yetingk commented Jul 31, 2024

topperc left a comment

preames left a comment

topperc commented Jul 31, 2024

preames commented Jul 31, 2024

lukel97 commented Aug 1, 2024

yetingk commented Aug 1, 2024

lukel97 commented Aug 1, 2024

[RISCV] Use experimental.vp.splat to splat specific vector length elements. #101329

[RISCV] Use experimental.vp.splat to splat specific vector length elements. #101329

Conversation

yetingk commented Jul 31, 2024 • edited Loading

llvmbot commented Jul 31, 2024

topperc commented Jul 31, 2024 • edited Loading

yetingk commented Jul 31, 2024

topperc left a comment

Choose a reason for hiding this comment

preames left a comment

Choose a reason for hiding this comment

topperc commented Jul 31, 2024

preames commented Jul 31, 2024

lukel97 commented Aug 1, 2024

yetingk commented Aug 1, 2024

lukel97 commented Aug 1, 2024

yetingk commented Jul 31, 2024 •

edited

Loading

topperc commented Jul 31, 2024 •

edited

Loading