From d5ab38f69c1a5c1456bc1a8cc936489d31599f33 Mon Sep 17 00:00:00 2001 From: Jianjian Guan Date: Thu, 6 Jun 2024 10:33:54 +0800 Subject: [PATCH] [RISCV] Support select/merge like ops for bf16 vectors when have Zvfbfmin (#91936) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 +- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 15 +- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 8 +- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 5 +- .../RISCV/rvv/fixed-vectors-select-fp.ll | 128 +++++++- .../RISCV/rvv/fixed-vectors-vpmerge.ll | 144 +++++++- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 56 +++- llvm/test/CodeGen/RISCV/rvv/select-fp.ll | 188 ++++++++++- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 308 +++++++++++++++++- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 224 ++++++++++++- llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 80 ++++- 11 files changed, 1150 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8ace5d79af079b..4051279fdbf8e0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1102,6 +1102,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXTRACT_SUBVECTOR}, VT, Custom); setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + if (Subtarget.hasStdExtZfbfmin()) { + if (Subtarget.hasVInstructionsF16()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); + else if (Subtarget.hasVInstructionsF16Minimal()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + } + setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, + Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); // TODO: Promote to fp32. } } @@ -1331,6 +1340,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXTRACT_SUBVECTOR}, VT, Custom); setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + if (Subtarget.hasStdExtZfbfmin()) { + if (Subtarget.hasVInstructionsF16()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); + else if (Subtarget.hasVInstructionsF16Minimal()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + } + setOperationAction( + {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, + Custom); // TODO: Promote to fp32. continue; } @@ -6704,10 +6722,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::SPLAT_VECTOR: - if (Op.getValueType().getScalarType() == MVT::f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) { - if (Op.getValueType() == MVT::nxv32f16) + if ((Op.getValueType().getScalarType() == MVT::f16 && + (Subtarget.hasVInstructionsF16Minimal() && + Subtarget.hasStdExtZfhminOrZhinxmin() && + !Subtarget.hasVInstructionsF16())) || + (Op.getValueType().getScalarType() == MVT::bf16 && + (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin() && + Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16()))) { + if (Op.getValueType() == MVT::nxv32f16 || + Op.getValueType() == MVT::nxv32bf16) return SplitVectorOp(Op, DAG); SDLoc DL(Op); SDValue NewScalar = diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index b0949f5fc1d725..fe4d839e4fdcb8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -382,7 +382,20 @@ class GetIntVTypeInfo { // Equivalent integer vector type. Eg. // VI8M1 → VI8M1 (identity) // VF64M4 → VI64M4 - VTypeInfo Vti = !cast(!subst("VF", "VI", !cast(vti))); + VTypeInfo Vti = !cast(!subst("VBF", "VI", + !subst("VF", "VI", + !cast(vti)))); +} + +// This functor is used to obtain the fp vector type that has the same SEW and +// multiplier as the input parameter type. +class GetFpVTypeInfo { + // Equivalent integer vector type. Eg. + // VF16M1 → VF16M1 (identity) + // VBF16M1 → VF16M1 + VTypeInfo Vti = !cast(!subst("VBF", "VF", + !subst("VI", "VF", + !cast(vti)))); } class MTypeInfo { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 3163e4bafd4b0d..497c4aadf7535f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1394,7 +1394,7 @@ defm : VPatFPSetCCSDNode_VV_VF_FV; // Floating-point vselects: // 11.15. Vector Integer Merge Instructions // 13.15. Vector Floating-Point Merge Instruction -foreach fvti = AllFloatVectors in { +foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1, @@ -1412,7 +1412,9 @@ foreach fvti = AllFloatVectors in { fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; } - let Predicates = GetVTypePredicates.Predicates in + + let Predicates = !listconcat(GetVTypePredicates.Vti>.Predicates, + GetVTypeScalarPredicates.Predicates) in def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), @@ -1475,7 +1477,7 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { //===----------------------------------------------------------------------===// foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { - let Predicates = !listconcat(GetVTypePredicates.Predicates, + let Predicates = !listconcat(GetVTypePredicates.Vti>.Predicates, GetVTypeScalarPredicates.Predicates) in def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)), (!cast("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index ce8133a5a297b9..70d8265e7be464 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2604,7 +2604,7 @@ foreach vti = AllFloatVectors in { } } -foreach fvti = AllFloatVectors in { +foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { // Floating-point vselects: // 11.15. Vector Integer Merge Instructions // 13.15. Vector Floating-Point Merge Instruction @@ -2639,7 +2639,8 @@ foreach fvti = AllFloatVectors in { GPR:$vl, fvti.Log2SEW)>; } - let Predicates = GetVTypePredicates.Predicates in { + let Predicates = !listconcat(GetVTypePredicates.Vti>.Predicates, + GetVTypeScalarPredicates.Predicates) in { def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll index d945cf56169812..7a96aad31f0843 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { @@ -343,3 +343,123 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1 %v = select i1 %cmp, <16 x double> %c, <16 x double> %d ret <16 x double> %v } + +define <2 x bfloat> @select_v2bf16(i1 zeroext %c, <2 x bfloat> %a, <2 x bfloat> %b) { +; CHECK-LABEL: select_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x bfloat> %a, <2 x bfloat> %b + ret <2 x bfloat> %v +} + +define <2 x bfloat> @selectcc_v2bf16(bfloat %a, bfloat %b, <2 x bfloat> %c, <2 x bfloat> %d) { +; CHECK-LABEL: selectcc_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, <2 x bfloat> %c, <2 x bfloat> %d + ret <2 x bfloat> %v +} + +define <4 x bfloat> @select_v4bf16(i1 zeroext %c, <4 x bfloat> %a, <4 x bfloat> %b) { +; CHECK-LABEL: select_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x bfloat> %a, <4 x bfloat> %b + ret <4 x bfloat> %v +} + +define <4 x bfloat> @selectcc_v4bf16(bfloat %a, bfloat %b, <4 x bfloat> %c, <4 x bfloat> %d) { +; CHECK-LABEL: selectcc_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, <4 x bfloat> %c, <4 x bfloat> %d + ret <4 x bfloat> %v +} + +define <8 x bfloat> @select_v8bf16(i1 zeroext %c, <8 x bfloat> %a, <8 x bfloat> %b) { +; CHECK-LABEL: select_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, <8 x bfloat> %a, <8 x bfloat> %b + ret <8 x bfloat> %v +} + +define <8 x bfloat> @selectcc_v8bf16(bfloat %a, bfloat %b, <8 x bfloat> %c, <8 x bfloat> %d) { +; CHECK-LABEL: selectcc_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, <8 x bfloat> %c, <8 x bfloat> %d + ret <8 x bfloat> %v +} + +define <16 x bfloat> @select_v16bf16(i1 zeroext %c, <16 x bfloat> %a, <16 x bfloat> %b) { +; CHECK-LABEL: select_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, <16 x bfloat> %a, <16 x bfloat> %b + ret <16 x bfloat> %v +} + +define <16 x bfloat> @selectcc_v16bf16(bfloat %a, bfloat %b, <16 x bfloat> %c, <16 x bfloat> %d) { +; CHECK-LABEL: selectcc_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, <16 x bfloat> %c, <16 x bfloat> %d + ret <16 x bfloat> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 9f0561b394b819..d360c3f635b5c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) @@ -1240,3 +1240,139 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) ret <32 x double> %v } + +declare <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) + +define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpmerge_vf_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpmerge_vf_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; ZVFHMIN-NEXT: ret + %elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0 + %va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer + %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) + ret <2 x bfloat> %v +} + +declare <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32) + +define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpmerge_vf_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpmerge_vf_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; ZVFHMIN-NEXT: ret + %elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0 + %va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer + %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) + ret <4 x bfloat> %v +} + +declare <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32) + +define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpmerge_vf_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpmerge_vf_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; ZVFHMIN-NEXT: ret + %elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0 + %va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer + %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) + ret <8 x bfloat> %v +} + +declare <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32) + +define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpmerge_vf_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpmerge_vf_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; ZVFHMIN-NEXT: ret + %elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0 + %va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer + %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) + ret <16 x bfloat> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 0a2ed3eb1ffbf7..c5d9cdacae749f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) @@ -683,3 +683,51 @@ define <16 x double> @select_v16f64(<16 x i1> %a, <16 x double> %b, <16 x double %v = call <16 x double> @llvm.vp.select.v16f64(<16 x i1> %a, <16 x double> %b, <16 x double> %c, i32 %evl) ret <16 x double> %v } + +declare <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) + +define <2 x bfloat> @select_v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 %evl) + ret <2 x bfloat> %v +} + +declare <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32) + +define <4 x bfloat> @select_v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 %evl) + ret <4 x bfloat> %v +} + +declare <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32) + +define <8 x bfloat> @select_v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 %evl) + ret <8 x bfloat> %v +} + +declare <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32) + +define <16 x bfloat> @select_v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 %evl) + ret <16 x bfloat> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll index f8581d8e21b390..2b9d847a9e873b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @select_nxv1f16(i1 zeroext %c, %a, %b) { @@ -427,3 +427,183 @@ define @selectcc_nxv8f64(double %a, double %b, %c, %d ret %v } + +define @select_nxv1bf16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1bf16(bfloat %a, bfloat %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2bf16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2bf16(bfloat %a, bfloat %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4bf16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4bf16(bfloat %a, bfloat %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8bf16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8bf16(bfloat %a, bfloat %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16bf16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmsne.vi v0, v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16bf16(bfloat %a, bfloat %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmsne.vi v0, v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv32bf16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv32bf16(bfloat %a, bfloat %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 +; CHECK-NEXT: feq.s a0, fa4, fa5 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret + %cmp = fcmp oeq bfloat %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 094e6c9cc754fa..e33c795169fab8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN declare @llvm.vp.merge.nxv1i1(, , , i32) @@ -1547,3 +1547,303 @@ define @vpmerge_vf_nxv8f64(double %a, @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) ret %v } + +declare @llvm.vp.merge.nxv1bf16(, , , i32) + +define @vpmerge_vv_nxv1bf16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vpmerge_vf_nxv1bf16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vpmerge_vf_nxv1bf16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; RV64ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, bfloat %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2bf16(, , , i32) + +define @vpmerge_vv_nxv2bf16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vpmerge_vf_nxv2bf16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vpmerge_vf_nxv2bf16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; RV64ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, bfloat %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4bf16(, , , i32) + +define @vpmerge_vv_nxv4bf16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vpmerge_vf_nxv4bf16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vpmerge_vf_nxv4bf16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; RV64ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, bfloat %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8bf16(, , , i32) + +define @vpmerge_vv_nxv8bf16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vpmerge_vf_nxv8bf16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vpmerge_vf_nxv8bf16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; RV64ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, bfloat %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16bf16(, , , i32) + +define @vpmerge_vv_nxv16bf16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vpmerge_vf_nxv16bf16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vpmerge_vf_nxv16bf16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; RV64ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, bfloat %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32bf16(, , , i32) + +define @vpmerge_vv_nxv32bf16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32bf16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv32bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { +; RV32ZVFH-LABEL: vpmerge_vf_nxv32bf16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vpmerge_vf_nxv32bf16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24 +; RV32ZVFHMIN-NEXT: vmv.v.v v20, v16 +; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24 +; RV64ZVFHMIN-NEXT: vmv.v.v v20, v16 +; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; RV64ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, bfloat %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32bf16( %m, %va, %vb, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index 53b8e4a78b756f..82c2fe3273bdfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN define @vfmerge_vv_nxv1f16( %va, %vb, %cond) { @@ -512,3 +512,219 @@ define void @vselect_legalize_regression( %a, %sel, ptr %out ret void } + +define @vfmerge_vv_nxv1bf16( %va, %vb, %cond) { +; CHECK-LABEL: vfmerge_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %vc = select %cond, %va, %vb + ret %vc +} + +define @vfmerge_fv_nxv1bf16( %va, bfloat %b, %cond) { +; CHECK-ZVFH-LABEL: vfmerge_fv_nxv1bf16: +; CHECK-ZVFH: # %bb.0: +; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-ZVFH-NEXT: ret +; +; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv1bf16: +; CHECK-ZVFHMIN: # %bb.0: +; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-ZVFHMIN-NEXT: ret + %head = insertelement poison, bfloat %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = select %cond, %splat, %va + ret %vc +} + +define @vfmerge_vv_nxv2bf16( %va, %vb, %cond) { +; CHECK-LABEL: vfmerge_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %vc = select %cond, %va, %vb + ret %vc +} + +define @vfmerge_fv_nxv2bf16( %va, bfloat %b, %cond) { +; CHECK-ZVFH-LABEL: vfmerge_fv_nxv2bf16: +; CHECK-ZVFH: # %bb.0: +; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-ZVFH-NEXT: ret +; +; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv2bf16: +; CHECK-ZVFHMIN: # %bb.0: +; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-ZVFHMIN-NEXT: ret + %head = insertelement poison, bfloat %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = select %cond, %splat, %va + ret %vc +} + +define @vfmerge_vv_nxv4bf16( %va, %vb, %cond) { +; CHECK-LABEL: vfmerge_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %vc = select %cond, %va, %vb + ret %vc +} + +define @vfmerge_fv_nxv4bf16( %va, bfloat %b, %cond) { +; CHECK-ZVFH-LABEL: vfmerge_fv_nxv4bf16: +; CHECK-ZVFH: # %bb.0: +; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-ZVFH-NEXT: ret +; +; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv4bf16: +; CHECK-ZVFHMIN: # %bb.0: +; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; CHECK-ZVFHMIN-NEXT: ret + %head = insertelement poison, bfloat %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = select %cond, %splat, %va + ret %vc +} + +define @vfmerge_vv_nxv8bf16( %va, %vb, %cond) { +; CHECK-LABEL: vfmerge_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vc = select %cond, %va, %vb + ret %vc +} + +define @vfmerge_fv_nxv8bf16( %va, bfloat %b, %cond) { +; CHECK-ZVFH-LABEL: vfmerge_fv_nxv8bf16: +; CHECK-ZVFH: # %bb.0: +; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-ZVFH-NEXT: ret +; +; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv8bf16: +; CHECK-ZVFHMIN: # %bb.0: +; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; CHECK-ZVFHMIN-NEXT: ret + %head = insertelement poison, bfloat %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = select %cond, %splat, %va + ret %vc +} + +define @vfmerge_zv_nxv8bf16( %va, %cond) { +; CHECK-LABEL: vfmerge_zv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %vc = select %cond, splat (bfloat zeroinitializer), %va + ret %vc +} + +define @vmerge_truelhs_nxv8bf16_0( %va, %vb) { +; CHECK-LABEL: vmerge_truelhs_nxv8bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vc = select splat (i1 1), %va, %vb + ret %vc +} + +define @vmerge_falselhs_nxv8bf16_0( %va, %vb) { +; CHECK-LABEL: vmerge_falselhs_nxv8bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %vc = select zeroinitializer, %va, %vb + ret %vc +} + +define @vfmerge_vv_nxv16bf16( %va, %vb, %cond) { +; CHECK-LABEL: vfmerge_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret + %vc = select %cond, %va, %vb + ret %vc +} + +define @vfmerge_fv_nxv16bf16( %va, bfloat %b, %cond) { +; CHECK-ZVFH-LABEL: vfmerge_fv_nxv16bf16: +; CHECK-ZVFH: # %bb.0: +; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-ZVFH-NEXT: ret +; +; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv16bf16: +; CHECK-ZVFHMIN: # %bb.0: +; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; CHECK-ZVFHMIN-NEXT: ret + %head = insertelement poison, bfloat %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = select %cond, %splat, %va + ret %vc +} + +define @vfmerge_vv_nxv32bf16( %va, %vb, %cond) { +; CHECK-LABEL: vfmerge_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret + %vc = select %cond, %va, %vb + ret %vc +} + +define @vfmerge_fv_nxv32bf16( %va, bfloat %b, %cond) { +; CHECK-ZVFH-LABEL: vfmerge_fv_nxv32bf16: +; CHECK-ZVFH: # %bb.0: +; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-ZVFH-NEXT: ret +; +; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv32bf16: +; CHECK-ZVFHMIN: # %bb.0: +; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24 +; CHECK-ZVFHMIN-NEXT: vmv.v.v v20, v16 +; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-ZVFHMIN-NEXT: ret + %head = insertelement poison, bfloat %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = select %cond, %splat, %va + ret %vc +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index ee0617c9314801..d1049e14fa29aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.select.nxv1i1(, , , i32) @@ -922,3 +922,75 @@ define @select_unknown_T_T( %x, @llvm.vp.select.nxv2i1( %x, %y, %y, i32 %evl) ret %a } + +declare @llvm.vp.select.nxv1bf16(, , , i32) + +define @select_nxv1bf16( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv1bf16( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv2bf16(, , , i32) + +define @select_nxv2bf16( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv2bf16( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv4bf16(, , , i32) + +define @select_nxv4bf16( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv4bf16( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv8bf16(, , , i32) + +define @select_nxv8bf16( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv8bf16( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv16bf16(, , , i32) + +define @select_nxv16bf16( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv16bf16( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv32bf16(, , , i32) + +define @select_nxv32bf16( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv32bf16( %a, %b, %c, i32 %evl) + ret %v +}