diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c4458b14f36ece..f9dc440b3c4176 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1082,10 +1082,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, + Custom); else - setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom); - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); @@ -4449,11 +4456,27 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, bool HasPassthru = Passthru && !Passthru.isUndef(); if (!HasPassthru && !Passthru) Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) - return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + MVT EltVT = VT.getVectorElementType(); MVT XLenVT = Subtarget.getXLenVT(); + if (VT.isFloatingPoint()) { + if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || + EltVT == MVT::bf16) { + if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) || + (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) + Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar); + else + Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar); + MVT IVT = VT.changeVectorElementType(MVT::i16); + Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru); + SDValue Splat = + lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget); + return DAG.getNode(ISD::BITCAST, DL, VT, Splat); + } + return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + } + // Simplest case is that the operand needs to be promoted to XLenVT. if (Scalar.getValueType().bitsLE(XLenVT)) { // If the operand is a constant, sign extend to increase our chances diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll index df1bd889c1042a..9c7ad239bcade3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define @masked_load_nxv1bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv1bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv1bf16(ptr, i32, , ) define @masked_load_nxv1f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv1f16: @@ -35,6 +48,17 @@ define @masked_load_nxv1f64(ptr %a, %mas } declare @llvm.masked.load.nxv1f64(ptr, i32, , ) +define @masked_load_nxv2bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv2bf16(ptr, i32, , ) + define @masked_load_nxv2f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define @masked_load_nxv2f64(ptr %a, %mas } declare @llvm.masked.load.nxv2f64(ptr, i32, , ) +define @masked_load_nxv4bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv4bf16(ptr, i32, , ) + define @masked_load_nxv4f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define @masked_load_nxv4f64(ptr %a, %mas } declare @llvm.masked.load.nxv4f64(ptr, i32, , ) +define @masked_load_nxv8bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv8bf16(ptr, i32, , ) + define @masked_load_nxv8f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define @masked_load_nxv8f64(ptr %a, %mas } declare @llvm.masked.load.nxv8f64(ptr, i32, , ) +define @masked_load_nxv16bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv16bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv16bf16(ptr, i32, , ) + define @masked_load_nxv16f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define @masked_load_nxv16f32(ptr %a, %m } declare @llvm.masked.load.nxv16f32(ptr, i32, , ) +define @masked_load_nxv32bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv32bf16(ptr, i32, , ) + define @masked_load_nxv32f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll index 17193aef1dff9e..ddb56e0d979a1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define void @masked_store_nxv1bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv1bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv1bf16.p0(, ptr, i32, ) define void @masked_store_nxv1f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv1f16: @@ -35,6 +48,17 @@ define void @masked_store_nxv1f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv2bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv2bf16.p0(, ptr, i32, ) + define void @masked_store_nxv2f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define void @masked_store_nxv2f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv4bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv4bf16.p0(, ptr, i32, ) + define void @masked_store_nxv4f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define void @masked_store_nxv4f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv8bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv8bf16.p0(, ptr, i32, ) + define void @masked_store_nxv8f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define void @masked_store_nxv8f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv16bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv16bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv16bf16.p0(, ptr, i32, ) + define void @masked_store_nxv16f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define void @masked_store_nxv16f32( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv32bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv32bf16.p0(, ptr, i32, ) + define void @masked_store_nxv32f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index be37be06f0e779..189ba08dddc7aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare @llvm.masked.gather.nxv1i8.nxv1p0(, i32, , ) @@ -1257,6 +1265,206 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ret void } +declare @llvm.masked.gather.nxv1bf16.nxv1p0(, i32, , ) + +define @mgather_nxv1bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1bf16.nxv1p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2bf16.nxv2p0(, i32, , ) + +define @mgather_nxv2bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2bf16.nxv2p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4bf16.nxv4p0(, i32, , ) + +define @mgather_nxv4bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, splat (i1 1), %passthru) + ret %v +} + +define @mgather_falsemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_falsemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_falsemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8bf16.nxv8p0(, i32, , ) + +define @mgather_nxv8bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} declare @llvm.masked.gather.nxv1f16.nxv1p0(, i32, , ) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 9bfa0f31dc3a61..29db67b4b0a41f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(, , i32, ) @@ -967,6 +975,184 @@ define void @mscatter_baseidx_nxv8i64( %val, ptr %base, , , i32, ) + +define void @mscatter_nxv1bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1bf16.nxv1p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(, , i32, ) + +define void @mscatter_nxv2bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2bf16.nxv2p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(, , i32, ) + +define void @mscatter_nxv4bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_truemask_nxv4bf16( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, splat (i1 1)) + ret void +} + +define void @mscatter_falsemask_nxv4bf16( %val, %ptrs) { +; CHECK-LABEL: mscatter_falsemask_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, zeroinitializer) + ret void +} + +declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(, , i32, ) + +define void @mscatter_nxv8bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + declare void @llvm.masked.scatter.nxv1f16.nxv1p0(, , i32, ) define void @mscatter_nxv1f16( %val, %ptrs, %m) { diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 14976f21b7dbba..d0a7e54679c81e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,14 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 @@ -352,6 +364,74 @@ define @strided_vpload_nxv8i64(ptr %ptr, i32 signext %stride, ret %load } +declare @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv1bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv2bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv4bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv4bf16_unit_stride(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 2, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv8bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + declare @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr, i32, , i32) define @strided_vpload_nxv1f16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { @@ -589,10 +669,10 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV32-NEXT: sltu a5, a3, a2 ; CHECK-RV32-NEXT: addi a5, a5, -1 ; CHECK-RV32-NEXT: and a2, a5, a2 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB50_2 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB56_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: .LBB50_2: +; CHECK-RV32-NEXT: .LBB56_2: ; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -661,10 +741,10 @@ define @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV64-NEXT: sltu a5, a2, a3 ; CHECK-RV64-NEXT: addi a5, a5, -1 ; CHECK-RV64-NEXT: and a3, a5, a3 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB50_2 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB56_2 ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a2, a4 -; CHECK-RV64-NEXT: .LBB50_2: +; CHECK-RV64-NEXT: .LBB56_2: ; CHECK-RV64-NEXT: mul a4, a2, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -689,19 +769,19 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @zero_strided_unmasked_vpload_nxv1i8_i8(ptr %ptr) { ; Test unmasked float zero strided define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { -; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: -; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero -; CHECK-OPT-NEXT: ret -; -; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: -; CHECK-NO-OPT: # %bb.0: -; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 -; CHECK-NO-OPT-NEXT: ret %load = call @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 4) ret %load } @@ -854,10 +922,10 @@ define @zero_strided_vadd_nxv16i64( %v, p ; CHECK-RV32-NEXT: and a3, a4, a3 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero -; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2 +; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a2, a1 -; CHECK-RV32-NEXT: .LBB55_2: +; CHECK-RV32-NEXT: .LBB61_2: ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll index e8704b35f31f7f..abdf9ab09bb9ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV64 @@ -280,6 +286,64 @@ define void @strided_vpstore_nxv8i64( %val, ptr %ptr, i32 sign ret void } +declare void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv1bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv2bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv4bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +define void @strided_vpstore_nxv4bf16_unit_stride( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 2, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv8bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + declare void @llvm.experimental.vp.strided.store.nxv1f16.p0.i32(, ptr, i32, , i32) define void @strided_vpstore_nxv1f16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { @@ -493,10 +557,10 @@ define void @strided_store_nxv16f64( %v, ptr %ptr, i32 sig ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB41_2 +; CHECK-NEXT: bltu a2, a3, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB41_2: +; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: sub a5, a2, a3 @@ -520,10 +584,10 @@ define void @strided_store_nxv16f64_allones_mask( %v, ptr ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB42_2 +; CHECK-NEXT: bltu a2, a3, .LBB47_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB42_2: +; CHECK-NEXT: .LBB47_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1 ; CHECK-NEXT: sub a3, a2, a3 @@ -549,15 +613,15 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: slli a6, a4, 1 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: bltu a3, a6, .LBB43_2 +; CHECK-NEXT: bltu a3, a6, .LBB48_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: .LBB43_2: +; CHECK-NEXT: .LBB48_2: ; CHECK-NEXT: mv a7, a5 -; CHECK-NEXT: bltu a5, a4, .LBB43_4 +; CHECK-NEXT: bltu a5, a4, .LBB48_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a4 -; CHECK-NEXT: .LBB43_4: +; CHECK-NEXT: .LBB48_4: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr t0, vlenb @@ -585,10 +649,10 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t -; CHECK-NEXT: bltu a0, a4, .LBB43_6 +; CHECK-NEXT: bltu a0, a4, .LBB48_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 -; CHECK-NEXT: .LBB43_6: +; CHECK-NEXT: .LBB48_6: ; CHECK-NEXT: mul a3, a5, a2 ; CHECK-NEXT: srli a4, a4, 2 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll index 5fbdefda9f4028..bbe34f4192d638 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN define @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_nxv1i8: @@ -270,62 +272,248 @@ define @vp_splat_nxv8i64(i64 %val, %m, i32 ret %splat } -define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1f16: +define @vp_splat_nxv1bf16(bfloat %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) - ret %splat + %splat = call @llvm.experimental.vp.splat.nxv1bf16(bfloat %val, %m, i32 %evl) + ret %splat } -define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2f16: +define @vp_splat_nxv2bf16(bfloat %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) - ret %splat + %splat = call @llvm.experimental.vp.splat.nxv2bf16(bfloat %val, %m, i32 %evl) + ret %splat } -define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4f16: +define @vp_splat_nxv4bf16(bfloat %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) - ret %splat + %splat = call @llvm.experimental.vp.splat.nxv4bf16(bfloat %val, %m, i32 %evl) + ret %splat } -define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8f16: +define @vp_splat_nxv8bf16(bfloat %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) - ret %splat + %splat = call @llvm.experimental.vp.splat.nxv8bf16(bfloat %val, %m, i32 %evl) + ret %splat } -define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16f16: +define @vp_splat_nxv16bf16(bfloat %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) - ret %splat + %splat = call @llvm.experimental.vp.splat.nxv16bf16(bfloat %val, %m, i32 %evl) + ret %splat } -define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32f16: +define @vp_splat_nxv32bf16(bfloat %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv32bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vp_splat_nxv1f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_splat_nxv1f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_splat_nxv1f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_splat_nxv1f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vp_splat_nxv2f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_splat_nxv2f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_splat_nxv2f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_splat_nxv2f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vp_splat_nxv4f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_splat_nxv4f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_splat_nxv4f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_splat_nxv4f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vp_splat_nxv8f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_splat_nxv8f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_splat_nxv8f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_splat_nxv8f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vp_splat_nxv16f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_splat_nxv16f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_splat_nxv16f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_splat_nxv16f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vp_splat_nxv32f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vp_splat_nxv32f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vp_splat_nxv32f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vp_splat_nxv32f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) ret %splat } @@ -452,10 +640,10 @@ define @vp_splat_nxv32i32(i32 %val, %m, i ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB39_2 +; CHECK-NEXT: bltu a1, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB39_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c0d7ecf74956b9..84c8321b5b9342 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0(, , i32) @@ -1237,6 +1241,195 @@ define @vpgather_baseidx_nxv8i64(ptr %base, %v } +declare @llvm.vp.gather.nxv1bf16.nxv1p0(, , i32) + +define @vpgather_nxv1bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv1bf16.nxv1p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv2bf16.nxv2p0(, , i32) + +define @vpgather_nxv2bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv2bf16.nxv2p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv4bf16.nxv4p0(, , i32) + +define @vpgather_nxv4bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_truemask_nxv4bf16( %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, splat (i1 1), i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv8bf16.nxv8p0(, , i32) + +define @vpgather_nxv8bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v10, v8, v8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + declare @llvm.vp.gather.nxv1f16.nxv1p0(, , i32) define @vpgather_nxv1f16( %ptrs, %m, i32 zeroext %evl) { @@ -2275,10 +2468,10 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: bltu a1, a2, .LBB113_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB104_2: +; RV32-NEXT: .LBB113_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2413,10 +2606,10 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: bltu a1, a2, .LBB104_2 +; RV64-NEXT: bltu a1, a2, .LBB113_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB104_2: +; RV64-NEXT: .LBB113_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2444,10 +2637,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: bltu a1, a2, .LBB114_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB105_2: +; RV32-NEXT: .LBB114_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2469,10 +2662,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV64-NEXT: bltu a1, a2, .LBB105_2 +; RV64-NEXT: bltu a1, a2, .LBB114_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB105_2: +; RV64-NEXT: .LBB114_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index d4f117fad37ee7..0a98b672fb19c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0(ptr, , i32) @@ -269,6 +273,64 @@ define @vpload_nxv8i64(ptr %ptr, %m, i32 ze ret %load } +declare @llvm.vp.load.nxv1bf16.p0(ptr, , i32) + +define @vpload_nxv1bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2bf16.p0(ptr, , i32) + +define @vpload_nxv2bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4bf16.p0(ptr, , i32) + +define @vpload_nxv4bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8bf16.p0(ptr, , i32) + +define @vpload_nxv8bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + declare @llvm.vp.load.nxv1f16.p0(ptr, , i32) define @vpload_nxv1f16(ptr %ptr, %m, i32 zeroext %evl) { @@ -461,10 +523,10 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: bltu a1, a2, .LBB38_2 +; CHECK-NEXT: bltu a1, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB38_2: +; CHECK-NEXT: .LBB43_2: ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t @@ -491,10 +553,10 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, , , , i32) @@ -1106,6 +1110,185 @@ define void @vpscatter_baseidx_nxv8i64( %val, ptr %base, , , , i32) + +define void @vpscatter_nxv1bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv1bf16.nxv1p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(, , , i32) + +define void @vpscatter_nxv2bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv2bf16.nxv2p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(, , , i32) + +define void @vpscatter_nxv4bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_truemask_nxv4bf16( %val, %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(, , , i32) + +define void @vpscatter_nxv8bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v12, v10, v10 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.nxv1f16.nxv1p0(, , , i32) define void @vpscatter_nxv1f16( %val, %ptrs, %m, i32 zeroext %evl) { @@ -2115,10 +2298,10 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: vl8re32.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB99_2 +; RV32-NEXT: bltu a1, a0, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: .LBB99_2: +; RV32-NEXT: .LBB108_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: sub a2, a1, a0 @@ -2148,10 +2331,10 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: mv a0, a2 -; RV64-NEXT: bltu a2, a1, .LBB99_2 +; RV64-NEXT: bltu a2, a1, .LBB108_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB99_2: +; RV64-NEXT: .LBB108_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: sub a0, a2, a1 @@ -2183,10 +2366,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB100_2 +; RV32-NEXT: bltu a2, a1, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB100_2: +; RV32-NEXT: .LBB109_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2223,10 +2406,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB100_2 +; RV64-NEXT: bltu a2, a1, .LBB109_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB100_2: +; RV64-NEXT: .LBB109_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 @@ -2264,10 +2447,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB101_2 +; RV32-NEXT: bltu a2, a1, .LBB110_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB101_2: +; RV32-NEXT: .LBB110_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2304,10 +2487,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB101_2 +; RV64-NEXT: bltu a2, a1, .LBB110_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB101_2: +; RV64-NEXT: .LBB110_2: ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -2346,10 +2529,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB102_2 +; RV32-NEXT: bltu a2, a1, .LBB111_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB102_2: +; RV32-NEXT: .LBB111_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2371,10 +2554,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB102_2 +; RV64-NEXT: bltu a2, a1, .LBB111_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB102_2: +; RV64-NEXT: .LBB111_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 015d7645aaa29b..d935e52149d207 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0(, ptr, , i32) @@ -208,6 +212,54 @@ define void @vpstore_nxv8i64( %val, ptr %ptr, , ptr, , i32) + +define void @vpstore_nxv1bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2bf16.p0(, ptr, , i32) + +define void @vpstore_nxv2bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4bf16.p0(, ptr, , i32) + +define void @vpstore_nxv4bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8bf16.p0(, ptr, , i32) + +define void @vpstore_nxv8bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + declare void @llvm.vp.store.nxv1f16.p0(, ptr, , i32) define void @vpstore_nxv1f16( %val, ptr %ptr, %m, i32 zeroext %evl) { @@ -369,10 +421,10 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr,