From e84993bdabbce678fdcb44f8525e4b86f8459401 Mon Sep 17 00:00:00 2001 From: Piyou Chen Date: Wed, 8 May 2024 04:48:14 -0700 Subject: [PATCH] [RISCV] Support post-regalloc vsetvli pass --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 19 +- llvm/test/CodeGen/RISCV/O0-pipeline.ll | 7 +- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 3 +- .../early-clobber-tied-def-subreg-liveness.ll | 18 +- .../RISCV/intrinsic-cttz-elts-vscale.ll | 26 +- llvm/test/CodeGen/RISCV/pr69586.ll | 210 +- ...regalloc-last-chance-recoloring-failure.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/abs-vp.ll | 11 +- .../RISCV/rvv/access-fixed-objects-by-rvv.ll | 4 +- .../CodeGen/RISCV/rvv/active_lane_mask.ll | 84 +- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 234 +- llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 258 +-- .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 23 +- llvm/test/CodeGen/RISCV/rvv/calling-conv.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 92 +- .../RISCV/rvv/combine-store-extract-crash.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 53 +- .../RISCV/rvv/concat-vector-insert-elt.ll | 10 +- .../RISCV/rvv/constant-folding-crash.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 129 +- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 182 +- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 32 +- .../rvv/fixed-vector-i8-index-cornercase.ll | 36 +- .../CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 410 ++-- .../RISCV/rvv/fixed-vectors-bswap-vp.ll | 234 +- .../rvv/fixed-vectors-buildvec-of-binop.ll | 7 +- .../RISCV/rvv/fixed-vectors-ceil-vp.ll | 94 +- .../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 1150 +++++----- .../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 523 +++-- .../RISCV/rvv/fixed-vectors-cttz-vp.ll | 1068 +++++---- .../rvv/fixed-vectors-deinterleave-load.ll | 4 +- .../RISCV/rvv/fixed-vectors-extract-i1.ll | 16 +- .../RISCV/rvv/fixed-vectors-extract.ll | 8 +- .../RISCV/rvv/fixed-vectors-floor-vp.ll | 94 +- .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 31 +- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 31 +- ...d-vectors-fnearbyint-constrained-sdnode.ll | 24 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 9 +- .../RISCV/rvv/fixed-vectors-fp-interleave.ll | 10 +- .../RISCV/rvv/fixed-vectors-fp-shuffles.ll | 21 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 2 +- .../RISCV/rvv/fixed-vectors-fp2i-sat.ll | 130 +- .../RISCV/rvv/fixed-vectors-fpext-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-fptosi-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-fptoui-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-fptrunc-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 80 +- .../rvv/fixed-vectors-insert-subvector.ll | 19 +- .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 6 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 177 +- .../rvv/fixed-vectors-int-explodevector.ll | 181 +- .../RISCV/rvv/fixed-vectors-int-interleave.ll | 14 +- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 60 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 192 +- ...fixed-vectors-interleaved-access-zve32x.ll | 40 +- .../rvv/fixed-vectors-interleaved-access.ll | 903 ++++---- .../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 82 +- .../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 62 +- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 22 +- .../RISCV/rvv/fixed-vectors-mask-splat.ll | 6 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 1583 ++++++------- .../rvv/fixed-vectors-masked-load-int.ll | 8 +- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 1979 ++++++++--------- .../rvv/fixed-vectors-masked-store-int.ll | 41 +- .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll | 127 +- .../rvv/fixed-vectors-reduction-formation.ll | 140 +- .../rvv/fixed-vectors-reduction-fp-vp.ll | 8 +- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 20 +- .../rvv/fixed-vectors-reduction-int-vp.ll | 8 +- .../RISCV/rvv/fixed-vectors-reduction-int.ll | 52 +- .../rvv/fixed-vectors-reduction-mask-vp.ll | 58 +- .../RISCV/rvv/fixed-vectors-rint-vp.ll | 104 +- .../RISCV/rvv/fixed-vectors-round-vp.ll | 94 +- .../RISCV/rvv/fixed-vectors-roundeven-vp.ll | 94 +- .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll | 94 +- .../CodeGen/RISCV/rvv/fixed-vectors-sad.ll | 12 +- .../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll | 55 +- .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 42 +- .../RISCV/rvv/fixed-vectors-sext-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-shuffle-concat.ll | 4 +- .../rvv/fixed-vectors-shuffle-exact-vlen.ll | 3 +- .../rvv/fixed-vectors-shuffle-reverse.ll | 18 +- .../rvv/fixed-vectors-shuffle-transpose.ll | 37 +- .../RISCV/rvv/fixed-vectors-sitofp-vp.ll | 4 +- .../rvv/fixed-vectors-strided-load-combine.ll | 8 +- .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 14 +- .../rvv/fixed-vectors-strided-vpstore.ll | 2 +- .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 156 +- .../RISCV/rvv/fixed-vectors-uitofp-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vadd-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vand-vp.ll | 14 +- .../RISCV/rvv/fixed-vectors-vcopysign-vp.ll | 29 +- .../RISCV/rvv/fixed-vectors-vfabs-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfma-vp.ll | 70 +- .../RISCV/rvv/fixed-vectors-vfmax-vp.ll | 29 +- .../RISCV/rvv/fixed-vectors-vfmin-vp.ll | 29 +- .../RISCV/rvv/fixed-vectors-vfmuladd-vp.ll | 70 +- .../RISCV/rvv/fixed-vectors-vfneg-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfsqrt-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll | 10 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll | 10 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll | 10 +- .../RISCV/rvv/fixed-vectors-vmax-vp.ll | 12 +- .../RISCV/rvv/fixed-vectors-vmaxu-vp.ll | 12 +- .../RISCV/rvv/fixed-vectors-vmin-vp.ll | 12 +- .../RISCV/rvv/fixed-vectors-vminu-vp.ll | 12 +- .../RISCV/rvv/fixed-vectors-vpgather.ll | 80 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 8 +- .../RISCV/rvv/fixed-vectors-vpmerge.ll | 18 +- .../RISCV/rvv/fixed-vectors-vpscatter.ll | 87 +- .../RISCV/rvv/fixed-vectors-vpstore.ll | 2 +- .../RISCV/rvv/fixed-vectors-vsadd-vp.ll | 22 +- .../RISCV/rvv/fixed-vectors-vsaddu-vp.ll | 22 +- .../RISCV/rvv/fixed-vectors-vscale-range.ll | 22 +- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 86 +- .../RISCV/rvv/fixed-vectors-vselect.ll | 24 +- .../RISCV/rvv/fixed-vectors-vssub-vp.ll | 22 +- .../RISCV/rvv/fixed-vectors-vssubu-vp.ll | 22 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll | 23 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll | 23 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll | 17 +- .../RISCV/rvv/fixed-vectors-vwmulsu.ll | 21 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll | 15 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll | 29 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll | 29 +- .../RISCV/rvv/fixed-vectors-zext-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 92 +- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 239 +- .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 239 +- .../rvv/fnearbyint-constrained-sdnode.ll | 30 +- .../CodeGen/RISCV/rvv/fnearbyint-sdnode.ll | 30 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 132 +- llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 301 ++- llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/masked-tama.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 22 +- .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 57 +- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 243 +- llvm/test/CodeGen/RISCV/rvv/pr63596.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/rint-vp.ll | 137 +- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 143 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 143 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 143 +- .../RISCV/rvv/rv32-spill-vector-csr.ll | 2 +- .../RISCV/rvv/rv64-spill-vector-csr.ll | 4 +- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 176 +- llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 54 +- .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 40 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll | 68 +- llvm/test/CodeGen/RISCV/rvv/stepvector.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 24 +- .../test/CodeGen/RISCV/rvv/strided-vpstore.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vcpop.ll | 14 +- .../RISCV/rvv/vector-deinterleave-fixed.ll | 4 +- .../RISCV/rvv/vector-deinterleave-load.ll | 50 +- .../CodeGen/RISCV/rvv/vector-deinterleave.ll | 46 +- .../RISCV/rvv/vector-interleave-store.ll | 12 +- .../CodeGen/RISCV/rvv/vector-interleave.ll | 192 +- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vfirst.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 64 +- .../RISCV/rvv/vfmadd-constrained-sdnode.ll | 156 +- llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll | 144 +- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 6 +- .../RISCV/rvv/vfmsub-constrained-sdnode.ll | 149 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll | 64 +- llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll | 4 +- .../RISCV/rvv/vfnmadd-constrained-sdnode.ll | 165 +- .../RISCV/rvv/vfnmsub-constrained-sdnode.ll | 141 +- llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 22 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmfeq.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vmfge.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vmfgt.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vmfle.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vmflt.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vmfne.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmsbf.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vmseq.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsge.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll | 176 +- llvm/test/CodeGen/RISCV/rvv/vmsgt.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsif.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vmsle.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsleu.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmslt.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsltu.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsne.ll | 178 +- llvm/test/CodeGen/RISCV/rvv/vmsof.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll | 8 +- .../rvv/vp-reverse-mask-fixed-vectors.ll | 8 +- .../test/CodeGen/RISCV/rvv/vp-reverse-mask.ll | 22 +- .../RISCV/rvv/vp-splice-mask-fixed-vectors.ll | 8 +- .../RISCV/rvv/vp-splice-mask-vectors.ll | 14 +- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 80 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 20 +- .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 44 +- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 28 +- .../RISCV/rvv/vreductions-fp-sdnode.ll | 8 +- .../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 12 +- .../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 2 +- .../CodeGen/RISCV/rvv/vreductions-mask-vp.ll | 76 +- .../RISCV/rvv/vrgatherei16-subreg-liveness.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 106 +- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 2 +- .../CodeGen/RISCV/rvv/vsetvli-regression.ll | 5 +- llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 48 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 48 +- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 22 +- llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll | 2 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 4 +- 249 files changed, 10385 insertions(+), 9574 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 9f609030ae9871..2918af85e0fa37 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -96,6 +96,10 @@ static cl::opt EnableMISchedLoadClustering( cl::desc("Enable load clustering in the machine scheduler"), cl::init(false)); +static cl::opt EnableVSETVLIAfterRVVRegAlloc( + "riscv-vsetvli-after-rvv-regalloc", cl::Hidden, + cl::desc("vsetvl insertion after rvv regalloc"), cl::init(true)); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -389,6 +393,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) { bool RISCVPassConfig::addRegAssignAndRewriteFast() { addPass(createRVVRegAllocPass(false)); + if (EnableVSETVLIAfterRVVRegAlloc) + addPass(createRISCVInsertVSETVLIPass()); addPass(createRISCVCoalesceVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) @@ -399,6 +405,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() { bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { addPass(createRVVRegAllocPass(true)); addPass(createVirtRegRewriter(false)); + if (EnableVSETVLIAfterRVVRegAlloc) + addPass(createRISCVInsertVSETVLIPass()); addPass(createRISCVCoalesceVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) @@ -545,10 +553,13 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVInsertWriteVXRMPass()); // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after // register coalescing so needVSETVLIPHI doesn't need to look through COPYs. - if (TM->getOptLevel() == CodeGenOptLevel::None) - insertPass(&PHIEliminationID, createRISCVInsertVSETVLIPass()); - else - insertPass(&RegisterCoalescerID, createRISCVInsertVSETVLIPass()); + if (!EnableVSETVLIAfterRVVRegAlloc) { + if (TM->getOptLevel() == CodeGenOptLevel::None) + insertPass(&PHIEliminationID, createRISCVInsertVSETVLIPass()); + else + insertPass(&RegisterCoalescerID, createRISCVInsertVSETVLIPass()); + } + } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 133dda144b927c..e4abc93d1a8a19 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -42,17 +42,14 @@ ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation -; CHECK-NEXT: MachineDominator Tree Construction -; CHECK-NEXT: Slot index numbering -; CHECK-NEXT: Live Interval Analysis -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis +; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Coalesce VSETVLI pass ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index a3b9dfbf745618..0528b00d408b20 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -117,7 +117,6 @@ ; CHECK-NEXT: RISC-V Merge Base Offset ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions @@ -129,7 +128,6 @@ ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Register Coalescer -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Rename Disconnected Subregister Components ; CHECK-NEXT: Machine Instruction Scheduler ; CHECK-NEXT: Machine Block Frequency Analysis @@ -143,6 +141,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Greedy Register Allocator ; CHECK-NEXT: Virtual Register Rewriter +; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Coalesce VSETVLI pass ; CHECK-NEXT: RISC-V Dead register definitions ; CHECK-NEXT: Virtual Register Map diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 83a4f63add337f..eb6ac985287a10 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -24,36 +24,36 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48) -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46) -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v14, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40) +; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44) @@ -71,12 +71,12 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, 1048572 ; CHECK-NEXT: addi a0, a0, 928 ; CHECK-NEXT: vmsbc.vx v0, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu ; CHECK-NEXT: vsext.vf2 v10, v8, v0.t ; CHECK-NEXT: lui a0, %hi(var_47) ; CHECK-NEXT: addi a0, a0, %lo(var_47) diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index ea8feef3329840..6009a6c7e138ae 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero -; RV32-NEXT: vid.v v8 +; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vid.v v16 ; RV32-NEXT: li a2, -1 -; RV32-NEXT: vmadd.vx v8, a2, v16 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmadd.vx v16, a2, v8 ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vmsne.vi v0, v16, 0 +; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vim v16, v16, -1, v0 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vand.vv v8, v16, v8 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a2, v8 ; RV32-NEXT: sltu a3, a0, a2 @@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV64: # %bb.0: ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vid.v v16 +; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vid.v v24 ; RV64-NEXT: li a1, -1 -; RV64-NEXT: vmadd.vx v16, a1, v24 +; RV64-NEXT: vmadd.vx v24, a1, v16 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll index 15daf2c5779063..7084c04805be72 100644 --- a/llvm/test/CodeGen/RISCV/pr69586.ll +++ b/llvm/test/CodeGen/RISCV/pr69586.ll @@ -927,258 +927,258 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 -; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: li a2, 11 ; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 -; REMAT-NEXT: vle32.v v16, (a2) +; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 -; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: li a2, 23 ; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 -; REMAT-NEXT: vle32.v v26, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v12, v16 -; REMAT-NEXT: vle32.v v12, (a2) -; REMAT-NEXT: lui a2, 3 -; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v14, (a2) -; REMAT-NEXT: li a2, 25 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: lui a2, 3 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v16, (a2) -; REMAT-NEXT: li a2, 13 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: li a2, 25 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v18, (a2) -; REMAT-NEXT: li a2, 27 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: li a2, 13 +; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v20, (a2) -; REMAT-NEXT: li a2, 7 -; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: li a2, 27 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v22, (a2) -; REMAT-NEXT: li a2, 29 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: li a2, 7 +; REMAT-NEXT: slli a2, a2, 11 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v26 -; REMAT-NEXT: li a2, 15 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: li a2, 29 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v28 -; REMAT-NEXT: li a2, 31 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: li a2, 15 +; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v30 -; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: li a2, 31 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v6 ; REMAT-NEXT: lui a2, 4 -; REMAT-NEXT: addiw a2, a2, 512 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v4 -; REMAT-NEXT: li a2, 17 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: addiw a2, a2, 512 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v2 -; REMAT-NEXT: lui a2, 4 -; REMAT-NEXT: addiw a2, a2, 1536 +; REMAT-NEXT: li a2, 17 +; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24 -; REMAT-NEXT: li a2, 9 -; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: addiw a2, a2, 1536 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26 -; REMAT-NEXT: lui a2, 5 -; REMAT-NEXT: addiw a2, a2, -1536 +; REMAT-NEXT: li a2, 9 +; REMAT-NEXT: slli a2, a2, 11 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v28 -; REMAT-NEXT: li a2, 19 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: lui a2, 5 +; REMAT-NEXT: addiw a2, a2, -1536 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v30 -; REMAT-NEXT: lui ra, 5 -; REMAT-NEXT: addiw ra, ra, -512 -; REMAT-NEXT: add a2, a0, ra +; REMAT-NEXT: li a2, 19 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v6 -; REMAT-NEXT: lui s11, 5 -; REMAT-NEXT: add a2, a0, s11 +; REMAT-NEXT: lui ra, 5 +; REMAT-NEXT: addiw ra, ra, -512 +; REMAT-NEXT: add a2, a0, ra ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4 -; REMAT-NEXT: lui s10, 5 -; REMAT-NEXT: addiw s10, s10, 512 -; REMAT-NEXT: add a2, a0, s10 +; REMAT-NEXT: lui s11, 5 +; REMAT-NEXT: add a2, a0, s11 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v2 -; REMAT-NEXT: li s9, 21 -; REMAT-NEXT: slli s9, s9, 10 -; REMAT-NEXT: add a2, a0, s9 +; REMAT-NEXT: lui s10, 5 +; REMAT-NEXT: addiw s10, s10, 512 +; REMAT-NEXT: add a2, a0, s10 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v24 -; REMAT-NEXT: lui s8, 5 -; REMAT-NEXT: addiw s8, s8, 1536 -; REMAT-NEXT: add a2, a0, s8 +; REMAT-NEXT: li s9, 21 +; REMAT-NEXT: slli s9, s9, 10 +; REMAT-NEXT: add a2, a0, s9 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v26 -; REMAT-NEXT: li s7, 11 -; REMAT-NEXT: slli s7, s7, 11 -; REMAT-NEXT: add a2, a0, s7 +; REMAT-NEXT: lui s8, 5 +; REMAT-NEXT: addiw s8, s8, 1536 +; REMAT-NEXT: add a2, a0, s8 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v28 -; REMAT-NEXT: lui s6, 6 -; REMAT-NEXT: addiw s6, s6, -1536 -; REMAT-NEXT: add a2, a0, s6 +; REMAT-NEXT: li s7, 11 +; REMAT-NEXT: slli s7, s7, 11 +; REMAT-NEXT: add a2, a0, s7 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v30 -; REMAT-NEXT: li s5, 23 -; REMAT-NEXT: slli s5, s5, 10 -; REMAT-NEXT: add a2, a0, s5 +; REMAT-NEXT: lui s6, 6 +; REMAT-NEXT: addiw s6, s6, -1536 +; REMAT-NEXT: add a2, a0, s6 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v6 -; REMAT-NEXT: lui s4, 6 -; REMAT-NEXT: addiw s4, s4, -512 -; REMAT-NEXT: add a2, a0, s4 +; REMAT-NEXT: li s5, 23 +; REMAT-NEXT: slli s5, s5, 10 +; REMAT-NEXT: add a2, a0, s5 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v4 -; REMAT-NEXT: lui s3, 6 -; REMAT-NEXT: add a2, a0, s3 +; REMAT-NEXT: lui s4, 6 +; REMAT-NEXT: addiw s4, s4, -512 +; REMAT-NEXT: add a2, a0, s4 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2 -; REMAT-NEXT: lui s2, 6 -; REMAT-NEXT: addiw s2, s2, 512 -; REMAT-NEXT: add a2, a0, s2 +; REMAT-NEXT: lui s3, 6 +; REMAT-NEXT: add a2, a0, s3 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v24 -; REMAT-NEXT: li s1, 25 -; REMAT-NEXT: slli s1, s1, 10 -; REMAT-NEXT: add a2, a0, s1 +; REMAT-NEXT: lui s2, 6 +; REMAT-NEXT: addiw s2, s2, 512 +; REMAT-NEXT: add a2, a0, s2 ; REMAT-NEXT: vle32.v v0, (a2) ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v26 -; REMAT-NEXT: lui s0, 6 -; REMAT-NEXT: addiw s0, s0, 1536 -; REMAT-NEXT: add a2, a0, s0 +; REMAT-NEXT: li s1, 25 +; REMAT-NEXT: slli s1, s1, 10 +; REMAT-NEXT: add a2, a0, s1 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v28 -; REMAT-NEXT: li t6, 13 -; REMAT-NEXT: slli t6, t6, 11 -; REMAT-NEXT: add a2, a0, t6 +; REMAT-NEXT: lui s0, 6 +; REMAT-NEXT: addiw s0, s0, 1536 +; REMAT-NEXT: add a2, a0, s0 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v30 -; REMAT-NEXT: lui t5, 7 -; REMAT-NEXT: addiw t5, t5, -1536 -; REMAT-NEXT: add a2, a0, t5 +; REMAT-NEXT: li t6, 13 +; REMAT-NEXT: slli t6, t6, 11 +; REMAT-NEXT: add a2, a0, t6 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v6 -; REMAT-NEXT: li t4, 27 -; REMAT-NEXT: slli t4, t4, 10 -; REMAT-NEXT: add a2, a0, t4 +; REMAT-NEXT: lui t5, 7 +; REMAT-NEXT: addiw t5, t5, -1536 +; REMAT-NEXT: add a2, a0, t5 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v4 -; REMAT-NEXT: lui t3, 7 -; REMAT-NEXT: addiw t3, t3, -512 -; REMAT-NEXT: add a2, a0, t3 +; REMAT-NEXT: li t4, 27 +; REMAT-NEXT: slli t4, t4, 10 +; REMAT-NEXT: add a2, a0, t4 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v2 +; REMAT-NEXT: lui t3, 7 +; REMAT-NEXT: addiw t3, t3, -512 +; REMAT-NEXT: add a2, a0, t3 +; REMAT-NEXT: vle32.v v2, (a2) +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0 ; REMAT-NEXT: lui t2, 7 ; REMAT-NEXT: add a2, a0, t2 -; REMAT-NEXT: vle32.v v2, (a2) +; REMAT-NEXT: vle32.v v0, (a2) ; REMAT-NEXT: vle32.v v8, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0 +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26 ; REMAT-NEXT: lui t1, 7 ; REMAT-NEXT: addiw t1, t1, 512 ; REMAT-NEXT: add a2, a0, t1 -; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: vle32.v v16, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26 +; REMAT-NEXT: vle32.v v18, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28 ; REMAT-NEXT: li t0, 29 ; REMAT-NEXT: slli t0, t0, 10 ; REMAT-NEXT: add a2, a0, t0 -; REMAT-NEXT: vle32.v v18, (a2) +; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: vle32.v v26, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28 +; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30 ; REMAT-NEXT: lui a7, 7 ; REMAT-NEXT: addiw a7, a7, 1536 ; REMAT-NEXT: add a2, a0, a7 -; REMAT-NEXT: vle32.v v20, (a2) +; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: vle32.v v28, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30 +; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6 ; REMAT-NEXT: li a6, 15 ; REMAT-NEXT: slli a6, a6, 11 ; REMAT-NEXT: add a2, a0, a6 -; REMAT-NEXT: vle32.v v22, (a2) +; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v30, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6 +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4 ; REMAT-NEXT: lui a5, 8 ; REMAT-NEXT: addiw a5, a5, -1536 ; REMAT-NEXT: add a2, a0, a5 -; REMAT-NEXT: vle32.v v24, (a2) +; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: vle32.v v6, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4 +; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2 ; REMAT-NEXT: li a4, 31 ; REMAT-NEXT: slli a4, a4, 10 ; REMAT-NEXT: add a2, a0, a4 -; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: vle32.v v4, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2 +; REMAT-NEXT: sf.vc.vv 3, 0, v14, v0 ; REMAT-NEXT: lui a3, 8 ; REMAT-NEXT: addiw a3, a3, -512 ; REMAT-NEXT: add a2, a0, a3 -; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: lui a2, 8 ; REMAT-NEXT: add a0, a0, a2 ; REMAT-NEXT: vle32.v v0, (a0) -; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v18 -; REMAT-NEXT: sf.vc.vv 3, 0, v26, v20 -; REMAT-NEXT: sf.vc.vv 3, 0, v28, v22 -; REMAT-NEXT: sf.vc.vv 3, 0, v30, v24 -; REMAT-NEXT: sf.vc.vv 3, 0, v6, v10 -; REMAT-NEXT: sf.vc.vv 3, 0, v4, v12 +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v16 +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v20 +; REMAT-NEXT: sf.vc.vv 3, 0, v26, v22 +; REMAT-NEXT: sf.vc.vv 3, 0, v28, v24 +; REMAT-NEXT: sf.vc.vv 3, 0, v30, v10 +; REMAT-NEXT: sf.vc.vv 3, 0, v6, v12 +; REMAT-NEXT: sf.vc.vv 3, 0, v4, v14 ; REMAT-NEXT: sf.vc.vv 3, 0, v2, v0 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; REMAT-NEXT: addi a0, a1, 1024 diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll index 81ef6072449e80..c92ba98dcc3385 100644 --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -43,7 +43,6 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call func -; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -55,6 +54,7 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; CHECK-NEXT: vfwsub.wv v8, v24, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, mu ; CHECK-NEXT: vfdiv.vv v8, v24, v8, v0.t @@ -99,7 +99,6 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: call func -; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; SUBREGLIVENESS-NEXT: csrr a0, vlenb ; SUBREGLIVENESS-NEXT: slli a0, a0, 3 ; SUBREGLIVENESS-NEXT: add a0, sp, a0 @@ -111,6 +110,7 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v16 ; SUBREGLIVENESS-NEXT: vsetvli zero, zero, e32, m8, tu, mu ; SUBREGLIVENESS-NEXT: vfdiv.vv v8, v24, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll index eb74e2d302f1a8..05d6716e471926 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -590,13 +590,12 @@ define @vp_abs_nxv16i64( %va, @access_fixed_and_vector_objects(ptr %val) { ; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v8, (a0) ; RV64IV-NEXT: addi a0, sp, 528 -; RV64IV-NEXT: ld a1, 520(sp) ; RV64IV-NEXT: vl1re64.v v9, (a0) -; RV64IV-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64IV-NEXT: ld a0, 520(sp) +; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 139579b3d2a361..9cb3991f31f94d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -103,9 +103,9 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) { define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 @@ -124,31 +124,30 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 4 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma @@ -174,51 +173,48 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v8, v16, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v11, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v9, v16, a2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 6 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI10_4) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v11 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v11, v16, a2 ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v0, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 2 +; CHECK-NEXT: lui a0, %hi(.LCPI10_4) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vle8.v v13, (a0) +; CHECK-NEXT: vsext.vf8 v16, v12 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v12, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v13 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v13, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 4 +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v12, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v13, 4 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v9, v16, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 879dff4a6e490b..5217148ba4f4ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1449,27 +1449,27 @@ define @vp_bitreverse_nxv1i64( %va, @vp_bitreverse_nxv1i64_unmasked( %va ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1733,27 +1733,27 @@ define @vp_bitreverse_nxv2i64( %va, @vp_bitreverse_nxv2i64_unmasked( %va ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -2017,13 +2017,13 @@ define @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv4i64_unmasked( %va ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -2311,20 +2311,23 @@ define @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64_unmasked( %va ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -2669,20 +2672,23 @@ define @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64_unmasked( %va ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -3056,13 +3062,13 @@ define @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 800d06c5a78f54..aadd9852af11ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -525,27 +525,27 @@ define @vp_bswap_nxv1i64( %va, @vp_bswap_nxv1i64_unmasked( %va, i32 ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -695,27 +695,27 @@ define @vp_bswap_nxv2i64( %va, @vp_bswap_nxv2i64_unmasked( %va, i32 ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -865,13 +865,13 @@ define @vp_bswap_nxv4i64( %va, @vp_bswap_nxv4i64_unmasked( %va, i32 ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1045,20 +1045,23 @@ define @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64_unmasked( %va, i32 ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1288,20 +1291,23 @@ define @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64_unmasked( %va, i32 ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1539,13 +1545,13 @@ define @vp_bswap_nxv64i16( %va, @vp_bswap_nxv64i16( %va, @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) @@ -1646,27 +1652,27 @@ define @vp_bswap_nxv1i48( %va, @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a3, a2, a1 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re32.v v0, (a0) -; CHECK-NEXT: vl8re32.v v8, (a1) +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vl8re32.v v0, (a1) +; CHECK-NEXT: vl8re32.v v16, (a3) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v16, (a2) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vv v0, v24, v0 +; CHECK-NEXT: vadd.vv v24, v8, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v24, v8 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v0, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: vadd.vv v24, v0, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v0, v8 +; CHECK-NEXT: vadd.vv v24, v24, v16 ; CHECK-NEXT: vadd.vx v16, v8, a4 ; CHECK-NEXT: vadd.vx v8, v24, a4 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 647d3158b6167f..fa62143546df60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -39,11 +39,11 @@ define @caller_scalable_vector_split_indirect( @caller_scalable_vector_split_indirect( @llvm.vp.ceil.nxv8f16(, @vp_ceil_vv_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -179,16 +179,16 @@ declare @llvm.vp.ceil.nxv16f16(, @vp_ceil_vv_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -223,16 +223,16 @@ declare @llvm.vp.ceil.nxv32f16(, @vp_ceil_vv_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -359,8 +359,8 @@ define @vp_ceil_vv_nxv4f32( %va, @vp_ceil_vv_nxv8f32( %va, @vp_ceil_vv_nxv16f32( %va, @llvm.vp.ceil.nxv2f64(, @vp_ceil_vv_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -569,16 +569,16 @@ declare @llvm.vp.ceil.nxv4f64(, @vp_ceil_vv_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -613,16 +613,16 @@ declare @llvm.vp.ceil.nxv7f64(, @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -657,16 +657,16 @@ declare @llvm.vp.ceil.nxv8f64(, @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -705,66 +705,56 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll index ed434deea1a837..482cf83d540c44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll @@ -10,19 +10,19 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV32-NEXT: th.lwd a2, a3, (a0), 0, 3 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v8, (a2) -; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vzext.vf4 v12, v8 ; RV32-NEXT: vmv.s.x v8, zero -; RV32-NEXT: vredsum.vs v10, v12, v8 -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredsum.vs v9, v12, v8 +; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: th.swia a0, (a1), 4, 0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vle8.v v10, (a3) +; RV32-NEXT: vle8.v v9, (a3) +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 4 +; RV32-NEXT: vslideup.vi v9, v10, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 +; RV32-NEXT: vzext.vf4 v12, v9 ; RV32-NEXT: vredsum.vs v8, v12, v8 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a1) @@ -33,19 +33,19 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV64-NEXT: th.ldd a2, a3, (a0), 0, 4 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v8, (a2) -; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vzext.vf4 v12, v8 ; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vredsum.vs v10, v12, v8 -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vredsum.vs v9, v12, v8 +; RV64-NEXT: vmv.x.s a0, v9 ; RV64-NEXT: th.swia a0, (a1), 4, 0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vle8.v v10, (a3) +; RV64-NEXT: vle8.v v9, (a3) +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v10, v9, 4 +; RV64-NEXT: vslideup.vi v9, v10, 4 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 +; RV64-NEXT: vzext.vf4 v12, v9 ; RV64-NEXT: vredsum.vs v8, v12, v8 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll index 673008d9c0b3d8..52811133c53f3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll @@ -197,28 +197,51 @@ entry: define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data) { ; RV64-LABEL: test_compresstore_v256i8: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v7, v8 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 128 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vle8.v v24, (a1) +; RV64-NEXT: vle8.v v16, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v9, v0, 1 ; RV64-NEXT: vmv.x.s a1, v9 ; RV64-NEXT: vmv.x.s a3, v0 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vcompress.vm v8, v16, v0 +; RV64-NEXT: vcompress.vm v16, v24, v0 ; RV64-NEXT: vcpop.m a4, v0 ; RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: vse8.v v16, (a0) +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vcompress.vm v8, v24, v7 -; RV64-NEXT: vcpop.m a2, v7 +; RV64-NEXT: vcompress.vm v16, v24, v8 +; RV64-NEXT: vcpop.m a2, v8 ; RV64-NEXT: cpop a3, a3 ; RV64-NEXT: cpop a1, a1 ; RV64-NEXT: add a0, a0, a3 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: vse8.v v16, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; RV32-LABEL: test_compresstore_v256i8: @@ -796,18 +819,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data) ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vse64.v v24, (a0) ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vslidedown.vi v8, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vcompress.vm v8, v16, v24 +; RV64-NEXT: vcompress.vm v24, v16, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmv.x.s a1, v0 ; RV64-NEXT: zext.h a1, a1 ; RV64-NEXT: cpopw a1, a1 ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vcpop.m a1, v24 +; RV64-NEXT: vcpop.m a1, v8 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vse64.v v24, (a0) ; RV64-NEXT: ret ; ; RV32-LABEL: test_compresstore_v32i64: @@ -818,18 +841,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data) ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vse64.v v24, (a0) ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: vslidedown.vi v8, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vcompress.vm v8, v16, v24 +; RV32-NEXT: vcompress.vm v24, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vmv.x.s a1, v0 ; RV32-NEXT: zext.h a1, a1 ; RV32-NEXT: cpop a1, a1 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vcpop.m a1, v24 +; RV32-NEXT: vcpop.m a1, v8 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: vse64.v v24, (a0) ; RV32-NEXT: ret entry: tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask) diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll index bd65ed52be6805..1343b64b876dcb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll @@ -189,16 +189,16 @@ define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) { ; RV32-LABEL: v4xi64_concat_vector_insert_idx3: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vle64.v v10, (a1) +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vle64.v v10, (a0) ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v9, v8, a2 ; RV32-NEXT: vslide1down.vx v9, v9, a3 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 1 +; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 -; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vse64.v v10, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: v4xi64_concat_vector_insert_idx3: diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll index 113154c0f9855b..7839b602706db1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll @@ -19,18 +19,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV32-LABEL: constant_folding_crash: ; RV32: # %bb.0: # %entry ; RV32-NEXT: lw a0, 8(a0) -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vmv.v.x v11, a0 -; RV32-NEXT: vmsne.vi v0, v11, 0 +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vmsne.vi v10, v10, 0 +; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmv1r.v v0, v11 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vrgather.vi v9, v8, 0 ; RV32-NEXT: vmsne.vi v0, v9, 0 @@ -42,18 +43,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV64-LABEL: constant_folding_crash: ; RV64: # %bb.0: # %entry ; RV64-NEXT: ld a0, 8(a0) -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vmv.v.x v13, a0 -; RV64-NEXT: vmsne.vi v0, v13, 0 +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vmsne.vi v12, v12, 0 +; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: vrgather.vi v9, v8, 0 ; RV64-NEXT: vmsne.vi v0, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index 41ec102c34efb7..6e538f3dfb38ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -3341,16 +3341,16 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; ; RV32F-LABEL: ctlz_zero_undef_nxv8i64: ; RV32F: # %bb.0: -; RV32F-NEXT: vmv8r.v v16, v8 ; RV32F-NEXT: li a0, 190 ; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32F-NEXT: vmv.v.x v8, a0 +; RV32F-NEXT: vmv.v.x v16, a0 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v24, v16 -; RV32F-NEXT: vsrl.vi v16, v24, 23 -; RV32F-NEXT: vwsubu.wv v8, v8, v16 +; RV32F-NEXT: vfncvt.f.xu.w v24, v8 +; RV32F-NEXT: vsrl.vi v8, v24, 23 +; RV32F-NEXT: vwsubu.wv v16, v16, v8 ; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv8r.v v8, v16 ; RV32F-NEXT: ret ; ; RV64F-LABEL: ctlz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 86086f5dc88f76..fff280c005b542 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -1259,8 +1259,8 @@ define @vp_ctlz_nxv16i64( %va, @vp_ctlz_nxv16i64( %va, @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -2487,8 +2487,8 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB94_2: ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t @@ -2512,8 +2512,8 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; CHECK-ZVBB-NEXT: # %bb.1: ; CHECK-ZVBB-NEXT: mv a0, a1 ; CHECK-ZVBB-NEXT: .LBB94_2: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 883f68aec1f422..e3c53212e91b77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2024,8 +2024,7 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) @@ -2375,13 +2378,13 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi a3, a3, 1365 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v0, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v24, v16, v24 ; RV32-NEXT: lui a3, 209715 @@ -2404,20 +2407,20 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v16, v16, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index ef8a6c704a44b3..d7fcb94bbf4974 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -2282,7 +2282,6 @@ define @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -2628,98 +2626,97 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v16, a2 ; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vand.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v16, v16, v8 +; RV32-NEXT: vsrl.vi v24, v16, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vsub.vv v16, v16, v24 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vadd.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v16, v8 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v8, v16, a3 +; RV32-NEXT: vsrl.vx v8, v8, a3 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB47_2: +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vx v16, v24, a2 +; RV32-NEXT: vsub.vx v8, v24, a2 ; RV32-NEXT: vnot.v v24, v24 -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 +; RV32-NEXT: vand.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a3 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4038,13 +4035,12 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB94_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t ; CHECK-NEXT: fsrmi a0, 1 @@ -4077,8 +4073,8 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-ZVBB-NEXT: # %bb.1: ; CHECK-ZVBB-NEXT: mv a0, a1 ; CHECK-ZVBB-NEXT: .LBB94_2: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index e69b4789a09afe..43bb8059f83525 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -142,22 +142,22 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: addi a3, sp, 64 ; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a4, a0, a2 -; RV32-NEXT: vl8r.v v16, (a4) +; RV32-NEXT: add a3, a0, a2 +; RV32-NEXT: vl8r.v v16, (a3) ; RV32-NEXT: vl8r.v v24, (a0) -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: add a1, a0, a1 +; RV32-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; RV32-NEXT: vmseq.vi v8, v16, 0 ; RV32-NEXT: vmseq.vi v0, v24, 0 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 -; RV32-NEXT: vs8r.v v24, (a3) -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: vs8r.v v24, (a0) +; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 -; RV32-NEXT: vs8r.v v8, (a2) +; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload @@ -182,22 +182,22 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: addi a3, sp, 64 ; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a4, a0, a2 -; RV64-NEXT: vl8r.v v16, (a4) +; RV64-NEXT: add a3, a0, a2 +; RV64-NEXT: vl8r.v v16, (a3) ; RV64-NEXT: vl8r.v v24, (a0) -; RV64-NEXT: add a1, a3, a1 -; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: add a1, a0, a1 +; RV64-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; RV64-NEXT: vmseq.vi v8, v16, 0 ; RV64-NEXT: vmseq.vi v0, v24, 0 ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 -; RV64-NEXT: vs8r.v v24, (a3) -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: vs8r.v v24, (a0) +; RV64-NEXT: add a0, a0, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 -; RV64-NEXT: vs8r.v v8, (a2) +; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index 875f4f239028b4..6b8d778bc3242f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -35,10 +35,10 @@ define <512 x i8> @single_source(<512 x i8> %a) { ; CHECK-NEXT: vslidedown.vi v16, v16, 4 ; CHECK-NEXT: li a0, 466 ; CHECK-NEXT: li a1, 465 +; CHECK-NEXT: lbu a2, 1012(sp) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma -; CHECK-NEXT: lbu a0, 1012(sp) ; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv.s.x v16, a2 ; CHECK-NEXT: li a0, 501 ; CHECK-NEXT: li a1, 500 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma @@ -118,16 +118,16 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: vslidedown.vi v24, v24, 4 ; CHECK-NEXT: li a1, 466 ; CHECK-NEXT: li a2, 465 +; CHECK-NEXT: lbu a3, 985(sp) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: lbu a1, 985(sp) ; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: vmv.s.x v24, a1 +; CHECK-NEXT: vmv.s.x v24, a3 ; CHECK-NEXT: li a1, 478 ; CHECK-NEXT: li a2, 477 +; CHECK-NEXT: lbu a3, 1012(sp) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: lbu a1, 1012(sp) ; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: vmv.s.x v24, a1 +; CHECK-NEXT: vmv.s.x v24, a3 ; CHECK-NEXT: li a1, 501 ; CHECK-NEXT: li a2, 500 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma @@ -137,21 +137,21 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: addi a1, a1, 501 ; CHECK-NEXT: slli a1, a1, 13 ; CHECK-NEXT: addi a1, a1, 512 +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: lui a2, 1047552 +; CHECK-NEXT: addiw a2, a2, 1 +; CHECK-NEXT: slli a2, a2, 23 +; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: slli a2, a2, 18 +; CHECK-NEXT: vslide1down.vx v0, v24, a2 +; CHECK-NEXT: lui a2, 4 +; CHECK-NEXT: vmv.s.x v24, a2 ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetivli zero, 7, e64, m1, tu, ma +; CHECK-NEXT: vslideup.vi v0, v24, 6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v7, 0 -; CHECK-NEXT: lui a1, 1047552 -; CHECK-NEXT: addiw a1, a1, 1 -; CHECK-NEXT: slli a1, a1, 23 -; CHECK-NEXT: addi a1, a1, 1 -; CHECK-NEXT: slli a1, a1, 18 -; CHECK-NEXT: vslide1down.vx v0, v7, a1 -; CHECK-NEXT: lui a1, 4 -; CHECK-NEXT: vmv.s.x v7, a1 -; CHECK-NEXT: vsetivli zero, 7, e64, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v7, 6 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-NEXT: addi sp, s0, -1536 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll index c0d366760d0797..f3e823562888fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll @@ -417,8 +417,8 @@ declare <32 x i64> @llvm.vp.abs.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_abs_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 @@ -432,8 +432,8 @@ define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t ; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 943fc58d637a00..068c25b8210020 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -847,27 +847,27 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3, v0.t ; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vor.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v11, v0.t +; RV32-NEXT: vsrl.vx v11, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t ; RV32-NEXT: vand.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vor.vv v11, v12, v11, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t ; RV32-NEXT: vand.vx v12, v12, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v8, v11, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 @@ -982,27 +982,27 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1119,27 +1119,27 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vand.vx v14, v8, a4, v0.t +; RV32-NEXT: vsll.vi v14, v14, 24, v0.t +; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vor.vv v14, v14, v16, v0.t +; RV32-NEXT: vor.vv v10, v10, v14, v0.t +; RV32-NEXT: vsrl.vx v14, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t ; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: vand.vx v16, v16, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v14, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 @@ -1254,27 +1254,27 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1391,13 +1391,13 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t ; RV32-NEXT: vor.vv v16, v12, v16, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vx v20, v8, a4, v0.t +; RV32-NEXT: vsll.vi v20, v20, 24, v0.t ; RV32-NEXT: vand.vv v24, v8, v12, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t @@ -1526,27 +1526,27 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1685,20 +1685,23 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 @@ -1711,10 +1714,10 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -1727,38 +1730,38 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 4, v0.t +; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vi v16, v16, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t @@ -1885,60 +1888,60 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v0, v0, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -2049,20 +2052,23 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 @@ -2075,10 +2081,10 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -2091,38 +2097,38 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 4, v0.t +; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vi v16, v16, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2249,60 +2255,60 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v0, v0, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -2388,8 +2394,8 @@ define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 @@ -2427,13 +2433,13 @@ define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a4 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a4 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index f80d4e5c0d7c3f..1490738687322a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -295,27 +295,27 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3, v0.t ; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vor.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v11, v0.t +; RV32-NEXT: vsrl.vx v11, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t ; RV32-NEXT: vand.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vor.vv v11, v12, v11, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t ; RV32-NEXT: vand.vx v12, v12, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v8, v11, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -373,27 +373,27 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -453,27 +453,27 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vand.vx v14, v8, a4, v0.t +; RV32-NEXT: vsll.vi v14, v14, 24, v0.t +; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vor.vv v14, v14, v16, v0.t +; RV32-NEXT: vor.vv v10, v10, v14, v0.t +; RV32-NEXT: vsrl.vx v14, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t ; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: vand.vx v16, v16, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v14, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -531,27 +531,27 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -611,13 +611,13 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t ; RV32-NEXT: vor.vv v16, v12, v16, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vx v20, v8, a4, v0.t +; RV32-NEXT: vsll.vi v20, v20, 24, v0.t ; RV32-NEXT: vand.vv v24, v8, v12, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t @@ -689,27 +689,27 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -779,20 +779,23 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -805,10 +808,10 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -913,13 +916,13 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1010,20 +1013,23 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -1036,10 +1042,10 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -1144,13 +1150,13 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1228,8 +1234,8 @@ define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -1246,13 +1252,13 @@ define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index af7d7f7ae755bc..65a1035fd815c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -567,13 +567,14 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vmv.s.x v10, a2 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vmv.s.x v10, a3 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) -; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vslideup.vi v8, v10, 7 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 3e2af7e8267b93..befbfb88550bad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 2f4539d5038c2a..b42fb8c6861644 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1503,24 +1503,28 @@ declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1535,37 +1539,60 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64: @@ -1655,33 +1682,29 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1743,24 +1766,28 @@ declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1775,37 +1802,60 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v16i64: @@ -1895,33 +1945,29 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1991,7 +2037,7 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -2035,111 +2081,145 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2147,13 +2227,13 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -2171,18 +2251,18 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2191,17 +2271,35 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -2211,7 +2309,7 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2219,21 +2317,21 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2257,8 +2355,8 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 @@ -2315,13 +2413,13 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vor.vv v16, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t @@ -2364,10 +2462,14 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -2391,74 +2493,8 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 @@ -2467,41 +2503,84 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsrl.vx v24, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vnot.v v0, v8 +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vx v0, v24, a2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -4060,24 +4139,28 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4092,37 +4175,60 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v15i64: @@ -4212,33 +4318,29 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -4298,24 +4400,28 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4330,37 +4436,60 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v16i64: @@ -4450,33 +4579,29 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -4544,7 +4669,7 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -4588,111 +4713,145 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4700,13 +4859,13 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -4724,18 +4883,18 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4744,17 +4903,35 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -4764,7 +4941,7 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4772,21 +4949,21 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4810,8 +4987,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB70_2 @@ -4868,13 +5045,13 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vor.vv v16, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t @@ -4917,10 +5094,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -4944,74 +5125,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB71_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 @@ -5020,41 +5135,84 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsrl.vx v24, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vnot.v v0, v8 +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vx v0, v24, a2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 0b6d8b33394d50..5fceab869ab85d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1119,55 +1119,93 @@ declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32) define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64: @@ -1228,34 +1266,29 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1303,55 +1336,93 @@ declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32) define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v16i64: @@ -1412,34 +1483,29 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1495,11 +1561,16 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 @@ -1524,74 +1595,93 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a2, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v8, v16, v0.t -; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: vadd.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vsub.vv v24, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v24, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: addi a2, sp, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t ; RV32-NEXT: csrr a2, vlenb @@ -1603,14 +1693,13 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -1625,20 +1714,37 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -1666,8 +1772,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 @@ -1710,13 +1816,13 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a6 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 16 +; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v16, v8, v16, v0.t @@ -1746,12 +1852,11 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1777,97 +1882,67 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a2, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a2), zero -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsub.vv v8, v8, v0 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v0, v8 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: addi a2, sp, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v16, v8 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v16, a1 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: vsub.vv v16, v8, v16 -; RV32-NEXT: vand.vv v8, v16, v0 +; RV32-NEXT: vand.vv v0, v16, v24 ; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vadd.vv v16, v0, v16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 4 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v16, v0 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index f2926fa91e5c26..e7736e7f360f31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1263,59 +1263,86 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64: @@ -1385,33 +1412,29 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1463,59 +1486,86 @@ declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v16i64: @@ -1585,33 +1635,29 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1671,7 +1717,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1705,111 +1751,145 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -1817,13 +1897,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v16, a1, v0.t ; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v8, v16, v8, v0.t @@ -1831,18 +1911,18 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1851,17 +1931,35 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -1871,7 +1969,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1879,21 +1977,21 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1917,8 +2015,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a1, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB34_2 @@ -1965,13 +2063,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -2004,10 +2102,14 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -2032,96 +2134,73 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: vsub.vx v24, v8, a2 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 ; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v24, v0, a2 +; RV32-NEXT: vnot.v v0, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v24, a2 -; RV32-NEXT: vnot.v v24, v24 -; RV32-NEXT: vand.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -3420,59 +3499,86 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: @@ -3542,33 +3648,29 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -3618,59 +3720,86 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64: @@ -3740,33 +3869,29 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -3824,7 +3949,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -3858,111 +3983,145 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -3970,13 +4129,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v16, a1, v0.t ; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v8, v16, v8, v0.t @@ -3984,18 +4143,18 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4004,17 +4163,35 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -4024,7 +4201,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4032,21 +4209,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4070,8 +4247,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a1, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB70_2 @@ -4118,13 +4295,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -4157,10 +4334,14 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -4185,96 +4366,73 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: .LBB71_2: ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: vsub.vx v24, v8, a2 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 ; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v24, v0, a2 +; RV32-NEXT: vnot.v v0, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v24, a2 -; RV32-NEXT: vnot.v v24, v24 -; RV32-NEXT: vand.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 1587f770f87ca2..9f8de22b25c2df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -15,16 +15,16 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t ; CHECK-NEXT: vmsne.vi v9, v9, 0 ; CHECK-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index dccb62877af3cc..386c71cf665ced 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -326,9 +326,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: andi a1, a1, 255 ; RV32-NEXT: li a2, 128 +; RV32-NEXT: addi a3, a0, 128 ; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle8.v v16, (a2) +; RV32-NEXT: vle8.v v16, (a3) ; RV32-NEXT: vle8.v v24, (a0) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: add a1, a0, a1 @@ -357,9 +357,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: andi a1, a1, 255 ; RV64-NEXT: li a2, 128 +; RV64-NEXT: addi a3, a0, 128 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: addi a2, a0, 128 -; RV64-NEXT: vle8.v v16, (a2) +; RV64-NEXT: vle8.v v16, (a3) ; RV64-NEXT: vle8.v v24, (a0) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: add a1, a0, a1 @@ -388,9 +388,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: andi sp, sp, -128 ; RV32ZBS-NEXT: andi a1, a1, 255 ; RV32ZBS-NEXT: li a2, 128 +; RV32ZBS-NEXT: addi a3, a0, 128 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV32ZBS-NEXT: addi a2, a0, 128 -; RV32ZBS-NEXT: vle8.v v16, (a2) +; RV32ZBS-NEXT: vle8.v v16, (a3) ; RV32ZBS-NEXT: vle8.v v24, (a0) ; RV32ZBS-NEXT: mv a0, sp ; RV32ZBS-NEXT: add a1, a0, a1 @@ -419,9 +419,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: andi sp, sp, -128 ; RV64ZBS-NEXT: andi a1, a1, 255 ; RV64ZBS-NEXT: li a2, 128 +; RV64ZBS-NEXT: addi a3, a0, 128 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64ZBS-NEXT: addi a2, a0, 128 -; RV64ZBS-NEXT: vle8.v v16, (a2) +; RV64ZBS-NEXT: vle8.v v16, (a3) ; RV64ZBS-NEXT: vle8.v v24, (a0) ; RV64ZBS-NEXT: mv a0, sp ; RV64ZBS-NEXT: add a1, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index e969da6fd45b71..1dd14709c839de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -721,9 +721,9 @@ define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32-NEXT: andi a1, a1, 63 ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: li a2, 32 +; RV32-NEXT: addi a3, a0, 128 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: vle32.v v16, (a0) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: add a1, a0, a1 @@ -749,9 +749,9 @@ define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64-NEXT: andi a1, a1, 63 ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: li a2, 32 +; RV64-NEXT: addi a3, a0, 128 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV64-NEXT: addi a2, a0, 128 -; RV64-NEXT: vle32.v v8, (a2) +; RV64-NEXT: vle32.v v8, (a3) ; RV64-NEXT: vle32.v v16, (a0) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: add a1, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 287dd510674d18..c1b4c5fda6c640 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index edb33158e32eba..51eb63f5f92212 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -177,8 +177,8 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t @@ -253,8 +253,8 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t @@ -608,7 +608,6 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -618,28 +617,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a0, vlenb @@ -666,13 +665,13 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb @@ -759,9 +758,9 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 48649c43f782ad..03e0ac42c442c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -177,8 +177,8 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t @@ -253,8 +253,8 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t @@ -608,7 +608,6 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -618,28 +617,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a0, vlenb @@ -666,13 +665,13 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb @@ -759,9 +758,9 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 1b50214bbf164d..9e9a8b8a4b644e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x half> %r @@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x half> %r @@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x half> %r @@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x half> %r @@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <32 x half> %r @@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x float> %r @@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x float> %r @@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x float> %r @@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x float> %r @@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %r @@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %r @@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x double> %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 9e83efd3519539..0047dc4fde643a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -39,9 +39,10 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; CHECK-NEXT: vmul.vx v14, v12, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vadd.vi v8, v14, -14 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vadd.vi v8, v14, -14 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v12 @@ -1407,8 +1408,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 @@ -1458,8 +1459,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index ed152e64a91ef4..f3b124aa34dcb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -56,9 +56,9 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vmv.v.i v0, 10 +; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -261,13 +261,13 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 ; V128-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v16, v8 ; V128-NEXT: vwmaccu.vx v0, a0, v8 ; V128-NEXT: vmv8r.v v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index b0f6bebea0381d..9151d562a1ecda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -93,12 +93,11 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 ; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -109,13 +108,13 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vrsub.vi v12, v12, 4 +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vrsub.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -129,12 +128,12 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI8_0) -; CHECK-NEXT: vlse64.v v10, (a1), zero -; CHECK-NEXT: vmul.vx v12, v12, a0 ; CHECK-NEXT: vmv.v.i v0, 3 +; CHECK-NEXT: vmul.vx v12, v12, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 0f003d7af6100e..d25312268ada62 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1199,7 +1199,7 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; ZVFH-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a1) ; ZVFH-NEXT: vle16.v v9, (a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index a8e4af2d7368e8..bc46e7d264bc0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -351,25 +351,23 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa3, v8 -; RV32-NEXT: feq.d a0, fa3, fa3 +; RV32-NEXT: feq.d a2, fa3, fa3 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 +; RV32-NEXT: fcvt.w.d a3, fa3, rtz +; RV32-NEXT: fld fa3, 40(sp) ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: fld fa2, 40(sp) -; RV32-NEXT: fcvt.w.d a2, fa3, rtz -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: feq.d a2, fa2, fa2 -; RV32-NEXT: fmax.d fa3, fa2, fa5 +; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: neg a0, a2 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: feq.d a2, fa3, fa3 +; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz ; RV32-NEXT: fld fa3, 32(sp) -; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: neg a0, a2 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: feq.d a2, fa3, fa3 @@ -395,8 +393,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa5, fa5, fa4 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -452,25 +450,23 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vfmv.f.s fa3, v8 -; RV64-NEXT: feq.d a0, fa3, fa3 +; RV64-NEXT: feq.d a2, fa3, fa3 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 +; RV64-NEXT: fcvt.l.d a3, fa3, rtz +; RV64-NEXT: fld fa3, 40(sp) ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: fld fa2, 40(sp) -; RV64-NEXT: fcvt.l.d a2, fa3, rtz -; RV64-NEXT: neg a0, a0 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: feq.d a2, fa2, fa2 -; RV64-NEXT: fmax.d fa3, fa2, fa5 +; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: neg a0, a2 +; RV64-NEXT: and a0, a0, a3 +; RV64-NEXT: feq.d a2, fa3, fa3 +; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz ; RV64-NEXT: fld fa3, 32(sp) -; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: neg a0, a2 ; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: feq.d a2, fa3, fa3 @@ -496,8 +492,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa5, fa5, fa4 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 @@ -542,46 +538,43 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a2, fa4, rtz -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.x v10, a2 -; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vfmv.f.s fa4, v10 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 -; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: fcvt.wu.d a3, fa4, rtz ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa4, v8 -; RV32-NEXT: fmax.d fa4, fa4, fa3 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: fld fa2, 40(sp) -; RV32-NEXT: fmin.d fa4, fa4, fa5 -; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: fld fa4, 32(sp) -; RV32-NEXT: fmax.d fa2, fa2, fa3 -; RV32-NEXT: fmin.d fa2, fa2, fa5 -; RV32-NEXT: fcvt.wu.d a2, fa2, rtz ; RV32-NEXT: fmax.d fa4, fa4, fa3 -; RV32-NEXT: fld fa2, 48(sp) ; RV32-NEXT: fmin.d fa4, fa4, fa5 -; RV32-NEXT: fcvt.wu.d a3, fa4, rtz -; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: fcvt.wu.d a4, fa4, rtz +; RV32-NEXT: fmax.d fa4, fa2, fa3 +; RV32-NEXT: fld fa2, 32(sp) +; RV32-NEXT: fmin.d fa4, fa4, fa5 +; RV32-NEXT: fcvt.wu.d a5, fa4, rtz +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: fmax.d fa4, fa2, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 +; RV32-NEXT: fcvt.wu.d a2, fa4, rtz +; RV32-NEXT: fld fa4, 48(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: fmax.d fa4, fa4, fa3 +; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz ; RV32-NEXT: fld fa4, 56(sp) -; RV32-NEXT: vmv.v.x v9, a3 -; RV32-NEXT: vslide1down.vx v9, v9, a2 +; RV32-NEXT: vmv.v.x v9, a2 +; RV32-NEXT: vslide1down.vx v9, v9, a5 ; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz -; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -618,46 +611,43 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a2, fa4, rtz -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.x v10, a2 -; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 2 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vfmv.f.s fa4, v10 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 -; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: fcvt.lu.d a3, fa4, rtz ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vfmv.f.s fa4, v8 -; RV64-NEXT: fmax.d fa4, fa4, fa3 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: fld fa2, 40(sp) -; RV64-NEXT: fmin.d fa4, fa4, fa5 -; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: fld fa4, 32(sp) -; RV64-NEXT: fmax.d fa2, fa2, fa3 -; RV64-NEXT: fmin.d fa2, fa2, fa5 -; RV64-NEXT: fcvt.lu.d a2, fa2, rtz ; RV64-NEXT: fmax.d fa4, fa4, fa3 -; RV64-NEXT: fld fa2, 48(sp) ; RV64-NEXT: fmin.d fa4, fa4, fa5 -; RV64-NEXT: fcvt.lu.d a3, fa4, rtz -; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: fcvt.lu.d a4, fa4, rtz ; RV64-NEXT: fmax.d fa4, fa2, fa3 +; RV64-NEXT: fld fa2, 32(sp) +; RV64-NEXT: fmin.d fa4, fa4, fa5 +; RV64-NEXT: fcvt.lu.d a5, fa4, rtz +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vmv.v.x v8, a2 +; RV64-NEXT: fmax.d fa4, fa2, fa3 +; RV64-NEXT: fmin.d fa4, fa4, fa5 +; RV64-NEXT: fcvt.lu.d a2, fa4, rtz +; RV64-NEXT: fld fa4, 48(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: vslide1down.vx v8, v8, a4 +; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz ; RV64-NEXT: fld fa4, 56(sp) -; RV64-NEXT: vmv.v.x v9, a3 -; RV64-NEXT: vslide1down.vx v9, v9, a2 +; RV64-NEXT: vmv.v.x v9, a2 +; RV64-NEXT: vslide1down.vx v9, v9, a5 ; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz -; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll index 48cc3f17a6269f..f195eeadf02746 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -96,8 +96,8 @@ declare <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float>, <32 x i1>, i32) define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 @@ -112,8 +112,8 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 16 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll index 49a1b19b58a270..a4050b716e7877 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -394,8 +394,8 @@ declare <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -408,8 +408,8 @@ define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll index d44efa2f6133f0..b652cdd88c7c2c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -394,8 +394,8 @@ declare <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -408,8 +408,8 @@ define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index d890bf5412f9fa..920eed322363a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -98,8 +98,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 ; CHECK-LABEL: vfptrunc_v32f32_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 @@ -112,8 +112,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index 6ffa6ac250ed7f..9c76b83d0974af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -132,12 +132,12 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) { define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ZVFH32-LABEL: si2fp_v3i7_v3f32: ; ZVFH32: # %bb.0: -; ZVFH32-NEXT: lw a1, 4(a0) -; ZVFH32-NEXT: lw a2, 0(a0) -; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: lw a1, 0(a0) +; ZVFH32-NEXT: lw a2, 4(a0) ; ZVFH32-NEXT: lw a0, 8(a0) -; ZVFH32-NEXT: vmv.v.x v8, a2 -; ZVFH32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: vmv.v.x v8, a1 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH32-NEXT: vadd.vv v8, v8, v8 @@ -149,12 +149,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFH64-LABEL: si2fp_v3i7_v3f32: ; ZVFH64: # %bb.0: -; ZVFH64-NEXT: ld a1, 8(a0) -; ZVFH64-NEXT: ld a2, 0(a0) -; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: ld a1, 0(a0) +; ZVFH64-NEXT: ld a2, 8(a0) ; ZVFH64-NEXT: ld a0, 16(a0) -; ZVFH64-NEXT: vmv.v.x v8, a2 -; ZVFH64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: vmv.v.x v8, a1 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH64-NEXT: vadd.vv v8, v8, v8 @@ -166,12 +166,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: lw a1, 4(a0) -; ZVFHMIN32-NEXT: lw a2, 0(a0) -; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: lw a1, 0(a0) +; ZVFHMIN32-NEXT: lw a2, 4(a0) ; ZVFHMIN32-NEXT: lw a0, 8(a0) -; ZVFHMIN32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vmv.v.x v8, a1 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN32-NEXT: vadd.vv v8, v8, v8 @@ -183,12 +183,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: ld a1, 8(a0) -; ZVFHMIN64-NEXT: ld a2, 0(a0) -; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: ld a1, 0(a0) +; ZVFHMIN64-NEXT: ld a2, 8(a0) ; ZVFHMIN64-NEXT: ld a0, 16(a0) -; ZVFHMIN64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vmv.v.x v8, a1 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN64-NEXT: vadd.vv v8, v8, v8 @@ -205,12 +205,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ZVFH32-LABEL: ui2fp_v3i7_v3f32: ; ZVFH32: # %bb.0: -; ZVFH32-NEXT: lw a1, 4(a0) -; ZVFH32-NEXT: lw a2, 0(a0) -; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: lw a1, 0(a0) +; ZVFH32-NEXT: lw a2, 4(a0) ; ZVFH32-NEXT: lw a0, 8(a0) -; ZVFH32-NEXT: vmv.v.x v8, a2 -; ZVFH32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: vmv.v.x v8, a1 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH32-NEXT: li a0, 127 @@ -222,12 +222,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFH64-LABEL: ui2fp_v3i7_v3f32: ; ZVFH64: # %bb.0: -; ZVFH64-NEXT: ld a1, 8(a0) -; ZVFH64-NEXT: ld a2, 0(a0) -; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: ld a1, 0(a0) +; ZVFH64-NEXT: ld a2, 8(a0) ; ZVFH64-NEXT: ld a0, 16(a0) -; ZVFH64-NEXT: vmv.v.x v8, a2 -; ZVFH64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: vmv.v.x v8, a1 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH64-NEXT: li a0, 127 @@ -239,12 +239,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN32-LABEL: ui2fp_v3i7_v3f32: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: lw a1, 4(a0) -; ZVFHMIN32-NEXT: lw a2, 0(a0) -; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: lw a1, 0(a0) +; ZVFHMIN32-NEXT: lw a2, 4(a0) ; ZVFHMIN32-NEXT: lw a0, 8(a0) -; ZVFHMIN32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vmv.v.x v8, a1 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN32-NEXT: li a0, 127 @@ -256,12 +256,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN64-LABEL: ui2fp_v3i7_v3f32: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: ld a1, 8(a0) -; ZVFHMIN64-NEXT: ld a2, 0(a0) -; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: ld a1, 0(a0) +; ZVFHMIN64-NEXT: ld a2, 8(a0) ; ZVFHMIN64-NEXT: ld a0, 16(a0) -; ZVFHMIN64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vmv.v.x v8, a1 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN64-NEXT: li a0, 127 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 53de1a87553553..e81f686a283032 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -277,14 +277,14 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) { define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { ; VLA-LABEL: insert_v8i32_v2i32_2: ; VLA: # %bb.0: -; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; VLA-NEXT: vle32.v v8, (a1) ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vle32.v v10, (a0) +; VLA-NEXT: vle32.v v8, (a0) +; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; VLA-NEXT: vle32.v v10, (a1) ; VLA-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; VLA-NEXT: vslideup.vi v10, v8, 2 +; VLA-NEXT: vslideup.vi v8, v10, 2 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vse32.v v10, (a0) +; VLA-NEXT: vse32.v v8, (a0) ; VLA-NEXT: ret ; ; VLS-LABEL: insert_v8i32_v2i32_2: @@ -306,12 +306,13 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { ; VLA-LABEL: insert_v8i32_v2i32_6: ; VLA: # %bb.0: +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vle32.v v8, (a0) ; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; VLA-NEXT: vle32.v v8, (a1) +; VLA-NEXT: vle32.v v10, (a1) ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vle32.v v10, (a0) -; VLA-NEXT: vslideup.vi v10, v8, 6 -; VLA-NEXT: vse32.v v10, (a0) +; VLA-NEXT: vslideup.vi v8, v10, 6 +; VLA-NEXT: vse32.v v8, (a0) ; VLA-NEXT: ret ; ; VLS-LABEL: insert_v8i32_v2i32_6: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 4954827876c19a..776a1e9bab6b26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -533,11 +533,11 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: vle64.v v12, (a1) +; CHECK-NEXT: li a1, 6 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vle64.v v12, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index ed6c01aaf7fe1c..ebd88c92d0d0ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -669,13 +669,14 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: li a1, 146 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v9, 2, v0 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret @@ -1183,42 +1184,42 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_loads_contigous: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 1(a0) -; CHECK-NEXT: lbu a2, 2(a0) -; CHECK-NEXT: lbu a3, 3(a0) -; CHECK-NEXT: lbu a4, 4(a0) -; CHECK-NEXT: lbu a5, 5(a0) -; CHECK-NEXT: lbu a6, 6(a0) -; CHECK-NEXT: lbu a7, 7(a0) -; CHECK-NEXT: lbu t0, 9(a0) -; CHECK-NEXT: lbu t1, 10(a0) -; CHECK-NEXT: lbu t2, 11(a0) -; CHECK-NEXT: lbu t3, 12(a0) -; CHECK-NEXT: lbu t4, 13(a0) -; CHECK-NEXT: lbu t5, 14(a0) -; CHECK-NEXT: lbu t6, 15(a0) +; CHECK-NEXT: addi a1, a0, 8 +; CHECK-NEXT: lbu a2, 1(a0) +; CHECK-NEXT: lbu a3, 2(a0) +; CHECK-NEXT: lbu a4, 3(a0) +; CHECK-NEXT: lbu a5, 4(a0) +; CHECK-NEXT: lbu a6, 5(a0) +; CHECK-NEXT: lbu a7, 6(a0) +; CHECK-NEXT: lbu t0, 7(a0) +; CHECK-NEXT: lbu t1, 9(a0) +; CHECK-NEXT: lbu t2, 10(a0) +; CHECK-NEXT: lbu t3, 11(a0) +; CHECK-NEXT: lbu t4, 12(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero -; CHECK-NEXT: addi a0, a0, 8 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: lbu t5, 13(a0) +; CHECK-NEXT: lbu t6, 14(a0) +; CHECK-NEXT: lbu a0, 15(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vlse8.v v9, (a0), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vlse8.v v9, (a1), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v10, v8, a7 -; CHECK-NEXT: vslide1down.vx v8, v9, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v10, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v9, t1 ; CHECK-NEXT: vslide1down.vx v8, v8, t2 ; CHECK-NEXT: vslide1down.vx v8, v8, t3 ; CHECK-NEXT: vslide1down.vx v8, v8, t4 ; CHECK-NEXT: vslide1down.vx v8, v8, t5 ; CHECK-NEXT: vslide1down.vx v8, v8, t6 -; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: li a1, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -1277,42 +1278,42 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_loads_gather: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 1(a0) -; CHECK-NEXT: lbu a2, 22(a0) -; CHECK-NEXT: lbu a3, 31(a0) -; CHECK-NEXT: lbu a4, 44(a0) -; CHECK-NEXT: lbu a5, 55(a0) -; CHECK-NEXT: lbu a6, 623(a0) -; CHECK-NEXT: lbu a7, 75(a0) -; CHECK-NEXT: lbu t0, 93(a0) -; CHECK-NEXT: lbu t1, 105(a0) -; CHECK-NEXT: lbu t2, 161(a0) -; CHECK-NEXT: lbu t3, 124(a0) -; CHECK-NEXT: lbu t4, 163(a0) -; CHECK-NEXT: lbu t5, 144(a0) -; CHECK-NEXT: lbu t6, 154(a0) +; CHECK-NEXT: addi a1, a0, 82 +; CHECK-NEXT: lbu a2, 1(a0) +; CHECK-NEXT: lbu a3, 22(a0) +; CHECK-NEXT: lbu a4, 31(a0) +; CHECK-NEXT: lbu a5, 44(a0) +; CHECK-NEXT: lbu a6, 55(a0) +; CHECK-NEXT: lbu a7, 623(a0) +; CHECK-NEXT: lbu t0, 75(a0) +; CHECK-NEXT: lbu t1, 93(a0) +; CHECK-NEXT: lbu t2, 105(a0) +; CHECK-NEXT: lbu t3, 161(a0) +; CHECK-NEXT: lbu t4, 124(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero -; CHECK-NEXT: addi a0, a0, 82 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: lbu t5, 163(a0) +; CHECK-NEXT: lbu t6, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vlse8.v v9, (a0), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vlse8.v v9, (a1), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v10, v8, a7 -; CHECK-NEXT: vslide1down.vx v8, v9, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v10, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v9, t1 ; CHECK-NEXT: vslide1down.vx v8, v8, t2 ; CHECK-NEXT: vslide1down.vx v8, v8, t3 ; CHECK-NEXT: vslide1down.vx v8, v8, t4 ; CHECK-NEXT: vslide1down.vx v8, v8, t5 ; CHECK-NEXT: vslide1down.vx v8, v8, t6 -; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: li a1, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -1375,17 +1376,17 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; CHECK-NEXT: lbu a3, 105(a0) ; CHECK-NEXT: lbu a4, 161(a0) ; CHECK-NEXT: lbu a5, 124(a0) -; CHECK-NEXT: lbu a6, 163(a0) -; CHECK-NEXT: lbu a7, 144(a0) -; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: lbu a1, 163(a0) +; CHECK-NEXT: lbu a6, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 ; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v8, v8, a7 ; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: ret %p9 = getelementptr i8, ptr %p, i32 82 @@ -1424,18 +1425,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; CHECK-NEXT: lbu a2, 22(a0) ; CHECK-NEXT: lbu a3, 31(a0) ; CHECK-NEXT: lbu a4, 44(a0) -; CHECK-NEXT: lbu a5, 55(a0) -; CHECK-NEXT: lbu a6, 623(a0) -; CHECK-NEXT: lbu a7, 75(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: lbu a5, 55(a0) +; CHECK-NEXT: lbu a6, 623(a0) +; CHECK-NEXT: lbu a0, 75(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 ; CHECK-NEXT: vslide1down.vx v8, v8, a5 ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -1470,29 +1471,29 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_undef_edges: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 31 -; CHECK-NEXT: lbu a2, 44(a0) -; CHECK-NEXT: lbu a3, 55(a0) -; CHECK-NEXT: lbu a4, 623(a0) -; CHECK-NEXT: lbu a5, 75(a0) -; CHECK-NEXT: lbu a6, 93(a0) -; CHECK-NEXT: lbu a7, 105(a0) -; CHECK-NEXT: lbu t0, 161(a0) +; CHECK-NEXT: addi a2, a0, 82 +; CHECK-NEXT: lbu a3, 44(a0) +; CHECK-NEXT: lbu a4, 55(a0) +; CHECK-NEXT: lbu a5, 623(a0) +; CHECK-NEXT: lbu a6, 75(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), zero -; CHECK-NEXT: addi a0, a0, 82 -; CHECK-NEXT: vslide1down.vx v8, v8, a2 -; CHECK-NEXT: vlse8.v v9, (a0), zero +; CHECK-NEXT: lbu a1, 93(a0) +; CHECK-NEXT: lbu a7, 105(a0) +; CHECK-NEXT: lbu a0, 161(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vlse8.v v9, (a2), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vslide1down.vx v10, v8, a5 -; CHECK-NEXT: vslide1down.vx v8, v9, a6 +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v10, v8, a6 +; CHECK-NEXT: vslide1down.vx v8, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 -; CHECK-NEXT: vslide1down.vx v8, v8, t0 -; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p4 = getelementptr i8, ptr %p, i32 31 @@ -1530,34 +1531,34 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 1(a0) -; CHECK-NEXT: lbu a2, 44(a0) -; CHECK-NEXT: lbu a3, 55(a0) -; CHECK-NEXT: lbu a4, 75(a0) -; CHECK-NEXT: lbu a5, 93(a0) -; CHECK-NEXT: lbu a6, 124(a0) -; CHECK-NEXT: lbu a7, 144(a0) -; CHECK-NEXT: lbu t0, 154(a0) +; CHECK-NEXT: addi a1, a0, 82 +; CHECK-NEXT: lbu a2, 1(a0) +; CHECK-NEXT: lbu a3, 44(a0) +; CHECK-NEXT: lbu a4, 55(a0) +; CHECK-NEXT: lbu a5, 75(a0) +; CHECK-NEXT: lbu a6, 93(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero -; CHECK-NEXT: addi a0, a0, 82 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: lbu a7, 124(a0) +; CHECK-NEXT: lbu t0, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 -; CHECK-NEXT: vlse8.v v9, (a0), zero +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vlse8.v v9, (a1), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a4 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslide1down.vx v10, v8, a4 -; CHECK-NEXT: vslide1down.vx v8, v9, a5 +; CHECK-NEXT: vslide1down.vx v10, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v9, a6 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vslide1down.vx v8, v8, t0 -; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: li a1, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll index 4509642fdef17c..e0c676788dcccf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll @@ -828,112 +828,104 @@ define i64 @explode_8xi64(<8 x i64> %v) { define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-LABEL: explode_16xi64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: .cfi_def_cfa_offset 64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: .cfi_offset s6, -32 -; RV32-NEXT: .cfi_offset s7, -36 -; RV32-NEXT: .cfi_offset s8, -40 -; RV32-NEXT: .cfi_offset s9, -44 -; RV32-NEXT: .cfi_offset s10, -48 -; RV32-NEXT: .cfi_offset s11, -52 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 0(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: .cfi_offset s3, -16 +; RV32-NEXT: .cfi_offset s4, -20 +; RV32-NEXT: .cfi_offset s5, -24 +; RV32-NEXT: .cfi_offset s6, -28 +; RV32-NEXT: .cfi_offset s7, -32 +; RV32-NEXT: .cfi_offset s8, -36 +; RV32-NEXT: .cfi_offset s9, -40 +; RV32-NEXT: .cfi_offset s10, -44 +; RV32-NEXT: .cfi_offset s11, -48 ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s a0, v24 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: vslidedown.vi v16, v8, 3 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s a1, v24 -; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vsrl.vx v24, v16, a0 +; RV32-NEXT: vmv.x.s a3, v24 ; RV32-NEXT: vmv.x.s a4, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 4 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s a5, v24 ; RV32-NEXT: vmv.x.s a6, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 5 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s a7, v24 ; RV32-NEXT: vmv.x.s t0, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 6 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s t1, v24 ; RV32-NEXT: vmv.x.s t2, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 7 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s t3, v24 ; RV32-NEXT: vmv.x.s t4, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 8 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s t5, v24 ; RV32-NEXT: vmv.x.s t6, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 9 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s0, v24 ; RV32-NEXT: vmv.x.s s1, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 10 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s2, v24 ; RV32-NEXT: vmv.x.s s3, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 11 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s4, v24 ; RV32-NEXT: vmv.x.s s5, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 12 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s6, v24 ; RV32-NEXT: vmv.x.s s7, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 13 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s s8, v24 -; RV32-NEXT: vmv.x.s s9, v16 +; RV32-NEXT: vsrl.vx v24, v16, a0 +; RV32-NEXT: vmv.x.s s9, v24 +; RV32-NEXT: vmv.x.s s8, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 14 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s s10, v24 -; RV32-NEXT: vmv.x.s s11, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 15 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s ra, v24 -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: vsrl.vx v24, v16, a0 +; RV32-NEXT: vmv.s.x v17, zero ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vredxor.vs v8, v8, v9 +; RV32-NEXT: vredxor.vs v17, v8, v17 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 15 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a3 -; RV32-NEXT: vmv.x.s a3, v9 -; RV32-NEXT: add a3, a3, a0 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sltu a1, a0, a1 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: add a4, a0, a4 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: vsrl.vx v18, v17, a0 +; RV32-NEXT: vmv.x.s s10, v18 +; RV32-NEXT: vmv.x.s s11, v17 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v0, v8, a0 +; RV32-NEXT: add a1, s10, a1 +; RV32-NEXT: add a2, s11, a2 +; RV32-NEXT: sltu a0, a2, s11 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a4, a2, a4 +; RV32-NEXT: sltu a1, a4, a2 +; RV32-NEXT: add a1, a1, a5 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a6, a4, a6 ; RV32-NEXT: sltu a1, a6, a4 ; RV32-NEXT: add a1, a1, a7 @@ -968,33 +960,36 @@ define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add s7, s5, s7 ; RV32-NEXT: sltu a1, s7, s5 -; RV32-NEXT: add a1, a1, s8 +; RV32-NEXT: add a1, a1, s9 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add s9, s7, s9 -; RV32-NEXT: sltu a1, s9, s7 -; RV32-NEXT: add a1, a1, s10 +; RV32-NEXT: vmv.x.s a1, v24 +; RV32-NEXT: add s8, s7, s8 +; RV32-NEXT: sltu a2, s8, s7 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: vmv.x.s a2, v16 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add s11, s9, s11 -; RV32-NEXT: sltu a1, s11, s9 -; RV32-NEXT: add a1, a1, ra +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: add a2, s8, a2 +; RV32-NEXT: sltu a3, a2, s8 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, a0, a1 -; RV32-NEXT: add a0, s11, a2 -; RV32-NEXT: sltu a2, a0, s11 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: sltu a2, a0, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 0(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: explode_16xi64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 40ff8b50d99d8d..2ea90203b21030 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -69,9 +69,9 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vmv.v.i v0, 10 +; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; V128-NEXT: vid.v v8 ; V128-NEXT: vsrl.vi v8, v8, 1 -; V128-NEXT: vadd.vi v8, v8, 1 ; V128-NEXT: vmv.v.i v0, 10 +; V128-NEXT: vadd.vi v8, v8, 1 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 ; V128-NEXT: ret @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu ; V512-NEXT: vid.v v8 ; V512-NEXT: vsrl.vi v8, v8, 1 -; V512-NEXT: vadd.vi v8, v8, 1 ; V512-NEXT: vmv.v.i v0, 10 +; V512-NEXT: vadd.vi v8, v8, 1 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret @@ -426,13 +426,13 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 ; V128-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v16, v8 ; V128-NEXT: vwmaccu.vx v0, a0, v8 ; V128-NEXT: vmv8r.v v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 58af6ac246d161..9ec6f314e55fd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -162,22 +162,21 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, 2 -; RV32-NEXT: lui a0, %hi(.LCPI11_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: li a0, 5 +; RV32-NEXT: lui a1, %hi(.LCPI11_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI11_0) +; RV32-NEXT: vle16.v v20, (a1) ; RV32-NEXT: vslide1down.vx v21, v16, a0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: li a0, 164 ; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv4r.v v16, v8 ; RV64-NEXT: lui a0, 327683 ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: addi a0, a0, 1 @@ -186,7 +185,7 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v20, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v16, v20 +; RV64-NEXT: vrgatherei16.vv v16, v8, v20 ; RV64-NEXT: li a0, 164 ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, 163841 @@ -194,9 +193,10 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 17 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; RV64-NEXT: vrgatherei16.vv v16, v12, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> ret <8 x i64> %s @@ -210,13 +210,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 -; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vle16.v v17, (a0) ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t +; RV32-NEXT: vrgatherei16.vv v12, v20, v16 +; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -367,10 +367,10 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -386,10 +386,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -402,10 +402,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: li a0, 67 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -421,10 +421,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: addi a0, a0, 514 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -440,10 +440,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -463,10 +463,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 98 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -666,8 +666,8 @@ define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_slidedown: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: li a0, 195 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -680,10 +680,10 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: li a0, 234 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -694,13 +694,13 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 635869904832c5..79c36a629465d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1100,46 +1100,46 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-LABEL: mulhu_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: lui a1, 3 ; CHECK-NEXT: addi a1, a1, -2044 ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: li a1, -128 -; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: lui a1, 1 ; CHECK-NEXT: addi a2, a1, 32 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI65_0) ; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0) ; CHECK-NEXT: vle8.v v11, (a2) -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v11 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v10 -; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: li a2, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vxm v12, v10, a2, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vsrl.vv v8, v9, v8 +; CHECK-NEXT: vmulhu.vv v8, v8, v11 +; CHECK-NEXT: vsub.vv v9, v9, v8 +; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vadd.vv v9, v9, v8 ; CHECK-NEXT: li a2, 513 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 4 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 ; CHECK-NEXT: addi a1, a1, 78 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v9, v9, 3, v0 ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 -; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v9, v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x @@ -1158,16 +1158,16 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: lui a1, %hi(.LCPI66_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 1 +; CHECK-NEXT: vle16.v v11, (a1) +; CHECK-NEXT: vmv.v.i v12, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vslideup.vi v9, v12, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: lui a1, %hi(.LCPI66_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) -; CHECK-NEXT: vle16.v v12, (a1) ; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vmulhu.vv v9, v9, v11 ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 @@ -1176,7 +1176,7 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vslideup.vi v9, v12, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) @@ -1222,18 +1222,18 @@ define void @mulhu_v4i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a1) +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmulhu.vv v9, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: vmulhu.vv v8, v8, v11 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: lui a1, 4128 ; CHECK-NEXT: addi a1, a1, 514 @@ -1455,13 +1455,13 @@ define void @mulhs_v2i64(ptr %x) { ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a2, a1, 32 -; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: lui a2, %hi(.LCPI74_0) ; RV64-NEXT: ld a2, %lo(.LCPI74_0)(a2) -; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v9, a2 @@ -3260,49 +3260,47 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: lui a1, 163907 ; CHECK-NEXT: addi a1, a1, -2044 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: li a1, -128 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 ; CHECK-NEXT: lui a1, 66049 ; CHECK-NEXT: addi a1, a1, 32 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: lui a1, %hi(.LCPI181_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI181_0) ; CHECK-NEXT: vle8.v v14, (a1) -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vsrl.vv v10, v8, v10 -; CHECK-NEXT: vmulhu.vv v10, v10, v14 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: vmulhu.vv v8, v8, v12 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v16, v12, a1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: vsrl.vv v8, v10, v8 +; CHECK-NEXT: vmulhu.vv v8, v8, v14 +; CHECK-NEXT: vsub.vv v10, v10, v8 +; CHECK-NEXT: vmulhu.vv v10, v10, v16 +; CHECK-NEXT: vadd.vv v10, v10, v8 ; CHECK-NEXT: lui a1, 8208 ; CHECK-NEXT: addi a1, a1, 513 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: lui a1, 66785 ; CHECK-NEXT: addi a1, a1, 78 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 ; CHECK-NEXT: lui a1, 529160 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 -; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vmerge.vim v12, v12, 3, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v10, v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x @@ -3326,12 +3324,12 @@ define void @mulhu_v16i16(ptr %x) { ; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: lui a1, %hi(.LCPI182_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) ; RV32-NEXT: vle16.v v14, (a1) +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsext.vf2 v16, v9 ; RV32-NEXT: vsrl.vv v16, v10, v16 ; RV32-NEXT: vmulhu.vv v14, v16, v14 @@ -3361,27 +3359,27 @@ define void @mulhu_v16i16(ptr %x) { ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: lui a1, 1048568 ; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV64-NEXT: lui a1, %hi(.LCPI182_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) +; RV64-NEXT: vle16.v v12, (a1) ; RV64-NEXT: li a1, 1 ; RV64-NEXT: slli a1, a1, 48 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vmv.v.x v14, a1 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI182_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; RV64-NEXT: vle16.v v14, (a1) -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsrl.vv v12, v8, v16 -; RV64-NEXT: vmulhu.vv v12, v12, v14 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vmulhu.vv v8, v8, v10 -; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: vsext.vf2 v16, v14 +; RV64-NEXT: vsrl.vv v14, v8, v16 +; RV64-NEXT: vmulhu.vv v12, v14, v12 ; RV64-NEXT: lui a1, %hi(.LCPI182_1) ; RV64-NEXT: addi a1, a1, %lo(.LCPI182_1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vlse64.v v10, (a1), zero +; RV64-NEXT: vlse64.v v14, (a1), zero ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsrl.vv v8, v8, v12 +; RV64-NEXT: vsub.vv v8, v8, v12 +; RV64-NEXT: vmulhu.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: vsext.vf2 v10, v14 +; RV64-NEXT: vsrl.vv v8, v8, v10 ; RV64-NEXT: vse16.v v8, (a0) ; RV64-NEXT: ret %a = load <16 x i16>, ptr %x @@ -3433,23 +3431,24 @@ define void @mulhu_v4i64(ptr %x) { ; RV32-NEXT: vle32.v v10, (a1) ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vmulhu.vv v10, v8, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: vmv.s.x v12, a1 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV32-NEXT: vslideup.vi v14, v12, 5 +; RV32-NEXT: lui a1, %hi(.LCPI184_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI184_1) +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vle8.v v12, (a1) ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v10 ; RV32-NEXT: vmulhu.vv v8, v8, v14 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a1, %hi(.LCPI184_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI184_1) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle8.v v10, (a1) -; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vsext.vf4 v10, v12 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vsrl.vv v8, v8, v12 +; RV32-NEXT: vsrl.vv v8, v8, v10 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -3457,19 +3456,19 @@ define void @mulhu_v4i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI184_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) +; RV64-NEXT: vle64.v v10, (a1) ; RV64-NEXT: li a1, -1 ; RV64-NEXT: slli a1, a1, 63 -; RV64-NEXT: vmv.s.x v10, a1 -; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vmv.s.x v12, a1 +; RV64-NEXT: vmv.v.i v14, 0 ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; RV64-NEXT: vslideup.vi v12, v10, 2 -; RV64-NEXT: lui a1, %hi(.LCPI184_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) +; RV64-NEXT: vslideup.vi v14, v12, 2 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v10, (a1) ; RV64-NEXT: vmulhu.vv v10, v8, v10 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vmulhu.vv v8, v8, v12 +; RV64-NEXT: vmulhu.vv v8, v8, v14 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: lui a1, 12320 ; RV64-NEXT: addi a1, a1, 513 @@ -3488,14 +3487,13 @@ define void @mulhs_v32i8(ptr %x) { ; CHECK-LABEL: mulhs_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v10, 7 ; CHECK-NEXT: lui a1, 304453 ; CHECK-NEXT: addi a1, a1, -1452 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 7 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: li a1, -123 ; CHECK-NEXT: vmv.v.x v12, a1 @@ -3615,19 +3613,19 @@ define void @mulhs_v4i64(ptr %x) { ; ; RV64-LABEL: mulhs_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: slli a2, a1, 32 ; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: vmv.v.x v10, a1 -; RV64-NEXT: lui a1, %hi(.LCPI188_0) -; RV64-NEXT: ld a1, %lo(.LCPI188_0)(a1) +; RV64-NEXT: lui a2, %hi(.LCPI188_0) +; RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 5 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmerge.vxm v10, v10, a2, v0 ; RV64-NEXT: vmulh.vv v10, v8, v10 ; RV64-NEXT: lui a1, 1044496 ; RV64-NEXT: addi a1, a1, -256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index 8acc70faaa1fc9..82e0760d593c26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -21,8 +21,8 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: vmv.s.x v8, a4 ; ZVE32X-NEXT: vand.vi v8, v8, 1 ; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmv.s.x v8, zero -; ZVE32X-NEXT: vmerge.vim v9, v8, 1, v0 +; ZVE32X-NEXT: vmv.s.x v9, zero +; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 ; ZVE32X-NEXT: xor a0, a0, a7 ; ZVE32X-NEXT: snez a0, a0 ; ZVE32X-NEXT: vmv.s.x v10, a0 @@ -32,32 +32,38 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: vmv.v.i v10, 0 ; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v11, v9, 1 +; ZVE32X-NEXT: vslideup.vi v11, v8, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 -; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 ; ZVE32X-NEXT: xor a0, a6, a3 ; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v11, a0 +; ZVE32X-NEXT: vmv.s.x v8, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v11, v11, 1 -; ZVE32X-NEXT: vmsne.vi v0, v11, 0 -; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 +; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vmsne.vi v8, v8, 0 +; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 +; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v9, v11, 2 +; ZVE32X-NEXT: vslideup.vi v11, v8, 2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v9, 0 -; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: xor a1, a2, a1 ; ZVE32X-NEXT: snez a0, a1 -; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: vmv.s.x v8, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v10, v10, 1 -; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vmsne.vi v8, v8, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vslideup.vi v9, v8, 3 -; ZVE32X-NEXT: vmsne.vi v0, v9, 0 +; ZVE32X-NEXT: vmerge.vim v10, v10, 1, v0 +; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 +; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vslideup.vi v10, v8, 3 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: ret ; ; ZVE64X-LABEL: load_large_vector: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 99364264de8293..178a920169ad96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -159,16 +159,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 54 +; RV32-NEXT: li a3, 82 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x36, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 54 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 21 +; RV32-NEXT: li a4, 57 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 @@ -177,30 +177,27 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 3 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: li a5, 41 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v1, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 37 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a5, a4, 6 +; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 4 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: li a5, 45 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill @@ -209,391 +206,429 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 13 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a5, a4, 5 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, %hi(.LCPI6_1) +; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1) +; RV32-NEXT: lui a5, 1 +; RV32-NEXT: vle16.v v8, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a6, 25 +; RV32-NEXT: mul a4, a4, a6 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 45 +; RV32-NEXT: li a4, 73 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI6_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_1) -; RV32-NEXT: lui a4, 1 -; RV32-NEXT: addi a4, a4, -64 -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a5, a1, 2 -; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: li a3, 49 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v16, (a3) +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, a5, -64 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v2, a4 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vrgatherei16.vv v16, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t -; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v12, v8 +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v16, v8 -; RV32-NEXT: vslideup.vi v8, v16, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v12, v8, 2 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v8, v16, 8, v0.t -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vslideup.vi v12, v16, 8, v0.t +; RV32-NEXT: vmv.v.v v20, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: lui a3, %hi(.LCPI6_3) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) +; RV32-NEXT: lui a4, %hi(.LCPI6_4) +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: vle16.v v16, (a3) +; RV32-NEXT: addi a1, a4, %lo(.LCPI6_4) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vle16.v v2, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI6_3) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_3) -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vrgatherei16.vv v24, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v8, v16, v0.t +; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v20, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v24, v2 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: slli a3, a1, 6 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t -; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v16, v8, 6, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: slli a3, a1, 5 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI6_4) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v12, v24, v8 -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) +; RV32-NEXT: lui a3, %hi(.LCPI6_6) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v24, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6) -; RV32-NEXT: li a3, 960 -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v16, (a1) +; RV32-NEXT: vle16.v v4, (a3) +; RV32-NEXT: li a1, 960 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 13 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vrgatherei16.vv v8, v24, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v12, v8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: lui a3, %hi(.LCPI6_8) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8) +; RV32-NEXT: lui a4, %hi(.LCPI6_9) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: addi a1, a4, %lo(.LCPI6_9) +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vle16.v v24, (a3) +; RV32-NEXT: vle16.v v28, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v4, v16, v8 -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v4, v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t -; RV32-NEXT: lui a1, %hi(.LCPI6_8) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v0, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_9) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9) -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 13 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v8, 6 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: lui a1, 15 -; RV32-NEXT: vmv.s.x v24, a1 -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv.s.x v3, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v16, 6 +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v28, v12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 57 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) +; RV32-NEXT: lui a3, %hi(.LCPI6_12) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v0, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_12) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12) -; RV32-NEXT: li a3, 1008 -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: vmv.s.x v25, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vle16.v v12, (a3) +; RV32-NEXT: li a1, 1008 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v0 -; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vrgatherei16.vv v24, v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v28, v8 +; RV32-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: lui a3, %hi(.LCPI6_14) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14) +; RV32-NEXT: lui a4, %hi(.LCPI6_15) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vle16.v v20, (a1) +; RV32-NEXT: addi a1, a4, %lo(.LCPI6_15) +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vle16.v v24, (a3) ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 41 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: slli a3, a1, 6 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v8, v20, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t -; RV32-NEXT: lui a1, %hi(.LCPI6_14) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14) +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_15) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15) -; RV32-NEXT: vle16.v v28, (a1) +; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 29 +; RV32-NEXT: li a2, 21 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v24, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 57 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.v.v v28, v0 +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v24, (a1) +; RV32-NEXT: vse32.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v28, (a1) ; RV32-NEXT: addi a1, a0, 192 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 2 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 4 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 54 +; RV32-NEXT: li a1, 82 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 @@ -604,372 +639,422 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 74 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xca, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 74 * vlenb ; RV64-NEXT: addi a2, a1, 256 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: li a3, 25 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 6 +; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v8, v16, 4 +; RV64-NEXT: vrgather.vi v12, v16, 4 ; RV64-NEXT: li a1, 128 -; RV64-NEXT: vmv.s.x v4, a1 +; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v16, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: li a3, 49 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv1r.v v0, v4 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t -; RV64-NEXT: vmv.v.v v20, v8 +; RV64-NEXT: vrgather.vi v12, v16, 2, v0.t ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vid.v v10 ; RV64-NEXT: li a1, 6 -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vmul.vx v6, v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v24, v6 +; RV64-NEXT: vmul.vx v2, v10, a1 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vle64.v v16, (a2) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v7, a1 +; RV64-NEXT: vadd.vi v10, v2, -16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: li a1, 56 -; RV64-NEXT: vmv.s.x v5, a1 -; RV64-NEXT: vadd.vi v16, v6, -16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: vrgatherei16.vv v16, v24, v2 +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV64-NEXT: vrgatherei16.vv v16, v24, v10, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v8 +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v24, v16, 5 -; RV64-NEXT: vmv1r.v v0, v4 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v12, v16, 5 +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vmv1r.v v6, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t +; RV64-NEXT: vrgather.vi v12, v16, 3, v0.t +; RV64-NEXT: vmv.v.v v28, v12 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v6, 1 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v24, v2, 1 +; RV64-NEXT: vadd.vi v26, v2, -15 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v6, -15 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: vrgatherei16.vv v16, v8, v24 +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v8, v26, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v24, v8 +; RV64-NEXT: vmv.v.v v28, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addi a1, a1, 7 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 6 +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vmv2r.v v26, v6 -; RV64-NEXT: vadd.vi v24, v6, 2 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgatherei16.vv v12, v16, v9 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v0, v24 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgatherei16.vv v12, v16, v10 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: vrgather.vi v12, v16, 2 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v12, v16, 3 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 24 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.s.x v1, a1 +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v24, v2, 2 +; RV64-NEXT: vadd.vi v4, v2, -14 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v24, v26, -14 -; RV64-NEXT: vmv2r.v v6, v26 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vrgatherei16.vv v8, v16, v24, v0.t -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgatherei16.vv v8, v16, v24 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 57 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v20, v24, v12 +; RV64-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v24, 4, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v8 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vmv2r.v v10, v6 +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v20, v16, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 6 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v6, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v6, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v4, v2, 3 +; RV64-NEXT: vadd.vi v8, v2, -13 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v0, v8 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v10, -13 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vrgatherei16.vv v8, v16, v4 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v8, v28, v0.t -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addi a1, a1, 7 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vrgatherei16.vv v20, v0, v12 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, 96 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: li a1, 192 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vrgather.vi v8, v24, 5, v0.t ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v28, v8, 2 -; RV64-NEXT: vrgatherei16.vv v28, v24, v12, v0.t -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 96 +; RV64-NEXT: li a2, 192 +; RV64-NEXT: vmv.s.x v28, a2 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vmv1r.v v0, v28 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 6 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v24, 4 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgatherei16.vv v12, v24, v8, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v0, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 28 ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v26, v24, -12 +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v30, v2, 4 +; RV64-NEXT: vadd.vi v6, v2, -12 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vrgatherei16.vv v16, v8, v30 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v28, v8 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v8, v6, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 112 ; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vmv1r.v v0, v28 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v16, v24 +; RV64-NEXT: vmv2r.v v8, v2 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v24, 5 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v12, v2, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v0, v12 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v0, v12 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v24, -11 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vadd.vi v2, v8, -11 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vrgatherei16.vv v24, v8, v2, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v20, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v20, (a1) ; RV64-NEXT: addi a1, a0, 192 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 20 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 12 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 56 +; RV64-NEXT: li a1, 74 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index d55683e653d246..2ba1a751f9ed5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -57,17 +57,17 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -118,50 +118,50 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -224,50 +224,50 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -328,57 +328,57 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 68(sp) ; RV32-NEXT: sw a0, 64(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 124(sp) ; RV32-NEXT: sw a0, 120(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 6 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 116(sp) ; RV32-NEXT: sw a0, 112(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 5 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 108(sp) ; RV32-NEXT: sw a0, 104(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 100(sp) ; RV32-NEXT: sw a0, 96(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 92(sp) ; RV32-NEXT: sw a0, 88(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 84(sp) ; RV32-NEXT: sw a0, 80(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf @@ -502,64 +502,64 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 196(sp) ; RV32-NEXT: sw a0, 192(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: sw a0, 128(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: sw a0, 152(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 148(sp) ; RV32-NEXT: sw a0, 144(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: sw a0, 136(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 188(sp) ; RV32-NEXT: sw a0, 184(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 6 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 180(sp) ; RV32-NEXT: sw a0, 176(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 5 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 172(sp) ; RV32-NEXT: sw a0, 168(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf @@ -711,17 +711,17 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -772,50 +772,50 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -890,32 +890,32 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 164(sp) ; RV32-NEXT: sw a0, 160(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: sw a0, 128(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: sw a0, 136(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: sw a0, 152(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 35baa6808db603..c377851d0c174a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -801,30 +801,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: fcvt.w.d a1, fa5 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vfmv.f.s fa5, v12 -; RV32-NEXT: fcvt.w.d a0, fa5 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.d a2, fa5 ; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa5, v8 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: fld fa4, 32(sp) +; RV32-NEXT: fld fa5, 32(sp) +; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fld fa3, 40(sp) -; RV32-NEXT: fcvt.w.d a0, fa5 +; RV32-NEXT: fcvt.w.d a3, fa4 +; RV32-NEXT: fcvt.w.d a4, fa5 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: fcvt.w.d a1, fa3 ; RV32-NEXT: fld fa5, 48(sp) -; RV32-NEXT: fcvt.w.d a1, fa4 -; RV32-NEXT: fcvt.w.d a2, fa3 -; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: fld fa5, 56(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a4 ; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: vslide1down.vx v8, v8, a0 @@ -854,30 +851,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.d a1, fa5 -; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: vmv.v.x v10, a1 -; RV64-i32-NEXT: vslide1down.vx v10, v10, a0 -; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-i32-NEXT: vslidedown.vi v12, v8, 2 -; RV64-i32-NEXT: vfmv.f.s fa5, v12 -; RV64-i32-NEXT: fcvt.l.d a0, fa5 -; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: vslide1down.vx v10, v10, a0 ; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-i32-NEXT: vslidedown.vi v10, v8, 2 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.d a2, fa5 ; RV64-i32-NEXT: vslidedown.vi v8, v8, 3 -; RV64-i32-NEXT: vfmv.f.s fa5, v8 -; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: fld fa4, 32(sp) +; RV64-i32-NEXT: fld fa5, 32(sp) +; RV64-i32-NEXT: vfmv.f.s fa4, v8 ; RV64-i32-NEXT: fld fa3, 40(sp) -; RV64-i32-NEXT: fcvt.l.d a0, fa5 +; RV64-i32-NEXT: fcvt.l.d a3, fa4 +; RV64-i32-NEXT: fcvt.l.d a4, fa5 +; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-i32-NEXT: vmv.v.x v8, a1 +; RV64-i32-NEXT: fcvt.l.d a1, fa3 ; RV64-i32-NEXT: fld fa5, 48(sp) -; RV64-i32-NEXT: fcvt.l.d a1, fa4 -; RV64-i32-NEXT: fcvt.l.d a2, fa3 -; RV64-i32-NEXT: vslide1down.vx v8, v10, a0 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a2 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a3 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: fld fa5, 56(sp) +; RV64-i32-NEXT: vslide1down.vx v8, v8, a4 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a1 -; RV64-i32-NEXT: vslide1down.vx v8, v8, a2 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 023d707f07bff7..36dd8c07d11cdd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -140,8 +140,9 @@ define <4 x i1> @buildvec_mask_v4i1() { define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 3 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -150,8 +151,9 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32F-NEXT: vmv.v.i v0, 3 +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 @@ -245,8 +247,8 @@ define <8 x i1> @buildvec_mask_v8i1() { define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 @@ -256,8 +258,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; ZVE32F-NEXT: li a2, 19 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 @@ -286,8 +288,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -303,8 +305,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vmv.v.i v0, 15 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -331,8 +333,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -348,8 +350,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vmv.v.i v0, 15 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -375,8 +377,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -391,8 +393,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vmv.v.i v0, 15 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 7fc442c88d101b..979785dd2c0243 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -24,11 +24,11 @@ define void @splat_zeros_v2i1(ptr %x) { define void @splat_v1i1(ptr %x, i1 %y) { ; CHECK-LABEL: splat_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index f42f32e246585e..d791fc2d1709ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -519,17 +519,17 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F-LABEL: mgather_truemask_v4i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: lbu a1, 0(a1) -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: lbu a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse8.v v8, (a2), zero +; RV64ZVE32F-NEXT: lbu a2, 0(a3) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru) ret <4 x i8> %v @@ -711,8 +711,8 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: .LBB12_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -734,8 +734,8 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB12_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1208,17 +1208,17 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; RV64ZVE32F-LABEL: mgather_truemask_v4i16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: lh a1, 0(a1) -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: lh a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 0(a3) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru) ret <4 x i16> %v @@ -1405,8 +1405,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB23_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1430,8 +1430,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1556,8 +1556,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1581,8 +1581,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1708,8 +1708,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1734,8 +1734,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1863,8 +1863,8 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: .LBB26_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1887,8 +1887,8 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB26_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2257,17 +2257,17 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; RV64ZVE32F-LABEL: mgather_truemask_v4i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: lw a1, 0(a1) -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero +; RV64ZVE32F-NEXT: lw a2, 0(a3) +; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru) ret <4 x i32> %v @@ -2453,8 +2453,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2478,8 +2478,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2603,8 +2603,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2628,8 +2628,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2757,8 +2757,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2783,8 +2783,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2915,8 +2915,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2940,8 +2940,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -3066,8 +3066,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -3091,8 +3091,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -3221,8 +3221,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -3247,8 +3247,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -3376,8 +3376,8 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: .LBB41_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -3400,8 +3400,8 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -4090,13 +4090,13 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB48_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB48_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -4253,8 +4253,8 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB48_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB48_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -4364,13 +4364,13 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB49_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB49_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -4527,8 +4527,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB49_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB49_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -4640,13 +4640,13 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB50_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB50_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -4805,8 +4805,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB50_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB50_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -4923,13 +4923,13 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB51_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5087,8 +5087,8 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB51_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB51_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -5198,13 +5198,13 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB52_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB52_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5362,8 +5362,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB52_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB52_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -5475,13 +5475,13 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB53_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB53_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5643,8 +5643,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB53_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a7, a6, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a7, .LBB53_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -5759,13 +5759,13 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB54_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB54_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5923,8 +5923,8 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB54_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB54_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -6032,13 +6032,13 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB55_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB55_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -6196,8 +6196,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: .LBB55_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB55_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -6306,13 +6306,13 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB56_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB56_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -6472,8 +6472,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: .LBB56_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB56_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -6589,27 +6589,27 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: lw a4, 56(a2) ; RV32ZVE32F-NEXT: lw a5, 48(a2) ; RV32ZVE32F-NEXT: lw a6, 40(a2) -; RV32ZVE32F-NEXT: lw a7, 32(a2) -; RV32ZVE32F-NEXT: lw t0, 24(a2) -; RV32ZVE32F-NEXT: lw t1, 16(a2) -; RV32ZVE32F-NEXT: lw t2, 8(a2) +; RV32ZVE32F-NEXT: lw a7, 8(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: lw t0, 16(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw a2, 32(a2) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB57_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a2, .LBB57_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) @@ -7017,14 +7017,14 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; RV64ZVE32F-LABEL: mgather_truemask_v4f16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: flh fa4, 0(a2) -; RV64ZVE32F-NEXT: flh fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: flh fa4, 0(a3) +; RV64ZVE32F-NEXT: flh fa3, 0(a0) ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 @@ -7214,8 +7214,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7239,8 +7239,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7365,8 +7365,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7390,8 +7390,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7517,8 +7517,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7543,8 +7543,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7672,8 +7672,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: .LBB67_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7696,8 +7696,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB67_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7940,14 +7940,14 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr ; RV64ZVE32F-LABEL: mgather_truemask_v4f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a1) -; RV64ZVE32F-NEXT: flw fa4, 0(a2) -; RV64ZVE32F-NEXT: flw fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero +; RV64ZVE32F-NEXT: flw fa4, 0(a3) +; RV64ZVE32F-NEXT: flw fa3, 0(a0) ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 @@ -8136,8 +8136,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8161,8 +8161,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8286,8 +8286,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8311,8 +8311,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8440,8 +8440,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8466,8 +8466,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8598,8 +8598,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8623,8 +8623,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8749,8 +8749,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8774,8 +8774,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8904,8 +8904,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8930,8 +8930,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -9059,8 +9059,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: .LBB80_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -9083,8 +9083,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB80_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -9664,31 +9664,32 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB87_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_11 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_12 ; RV32ZVE32F-NEXT: .LBB87_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_13 ; RV32ZVE32F-NEXT: .LBB87_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_14 ; RV32ZVE32F-NEXT: .LBB87_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_15 ; RV32ZVE32F-NEXT: .LBB87_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_16 ; RV32ZVE32F-NEXT: .LBB87_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_9 ; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9706,52 +9707,51 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_4 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_5 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_6 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_7 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_8 ; RV32ZVE32F-NEXT: j .LBB87_9 ; @@ -9779,8 +9779,8 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB87_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -9879,31 +9879,32 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB88_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_11 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_12 ; RV32ZVE32F-NEXT: .LBB88_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_13 ; RV32ZVE32F-NEXT: .LBB88_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_14 ; RV32ZVE32F-NEXT: .LBB88_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_15 ; RV32ZVE32F-NEXT: .LBB88_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_16 ; RV32ZVE32F-NEXT: .LBB88_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_9 ; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9921,52 +9922,51 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_3 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_4 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_5 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_6 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_7 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_8 ; RV32ZVE32F-NEXT: j .LBB88_9 ; @@ -9994,8 +9994,8 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB88_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB88_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10096,31 +10096,32 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB89_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_11 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_12 ; RV32ZVE32F-NEXT: .LBB89_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_13 ; RV32ZVE32F-NEXT: .LBB89_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_14 ; RV32ZVE32F-NEXT: .LBB89_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_15 ; RV32ZVE32F-NEXT: .LBB89_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_16 ; RV32ZVE32F-NEXT: .LBB89_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_9 ; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10138,52 +10139,51 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_3 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_4 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_5 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_6 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_7 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_8 ; RV32ZVE32F-NEXT: j .LBB89_9 ; @@ -10213,8 +10213,8 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10320,31 +10320,32 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB90_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 ; RV32ZVE32F-NEXT: .LBB90_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 ; RV32ZVE32F-NEXT: .LBB90_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 ; RV32ZVE32F-NEXT: .LBB90_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 ; RV32ZVE32F-NEXT: .LBB90_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_16 ; RV32ZVE32F-NEXT: .LBB90_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_9 ; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10362,52 +10363,51 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_8 ; RV32ZVE32F-NEXT: j .LBB90_9 ; @@ -10436,8 +10436,8 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB90_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB90_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10536,31 +10536,32 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB91_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_11 ; RV32ZVE32F-NEXT: .LBB91_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_12 ; RV32ZVE32F-NEXT: .LBB91_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_13 ; RV32ZVE32F-NEXT: .LBB91_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_14 ; RV32ZVE32F-NEXT: .LBB91_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_15 ; RV32ZVE32F-NEXT: .LBB91_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_16 ; RV32ZVE32F-NEXT: .LBB91_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB91_9 ; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10578,52 +10579,51 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_2 ; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_3 ; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_4 ; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_5 ; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_6 ; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_7 ; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB91_8 ; RV32ZVE32F-NEXT: j .LBB91_9 ; @@ -10652,8 +10652,8 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB91_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10754,31 +10754,32 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB92_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_11 ; RV32ZVE32F-NEXT: .LBB92_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_12 ; RV32ZVE32F-NEXT: .LBB92_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_13 ; RV32ZVE32F-NEXT: .LBB92_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_14 ; RV32ZVE32F-NEXT: .LBB92_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_15 ; RV32ZVE32F-NEXT: .LBB92_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_16 ; RV32ZVE32F-NEXT: .LBB92_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB92_9 ; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10796,52 +10797,51 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_2 ; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_3 ; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_4 ; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_5 ; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_6 ; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_7 ; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB92_8 ; RV32ZVE32F-NEXT: j .LBB92_9 ; @@ -10874,8 +10874,8 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a4, a3, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB92_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10979,31 +10979,32 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB93_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_11 ; RV32ZVE32F-NEXT: .LBB93_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_12 ; RV32ZVE32F-NEXT: .LBB93_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_13 ; RV32ZVE32F-NEXT: .LBB93_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_14 ; RV32ZVE32F-NEXT: .LBB93_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_15 ; RV32ZVE32F-NEXT: .LBB93_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_16 ; RV32ZVE32F-NEXT: .LBB93_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB93_9 ; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11021,52 +11022,51 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_2 ; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_3 ; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_4 ; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_5 ; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_6 ; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_7 ; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB93_8 ; RV32ZVE32F-NEXT: j .LBB93_9 ; @@ -11095,8 +11095,8 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB93_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11193,31 +11193,32 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB94_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_11 ; RV32ZVE32F-NEXT: .LBB94_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_12 ; RV32ZVE32F-NEXT: .LBB94_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_13 ; RV32ZVE32F-NEXT: .LBB94_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_14 ; RV32ZVE32F-NEXT: .LBB94_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_15 ; RV32ZVE32F-NEXT: .LBB94_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_16 ; RV32ZVE32F-NEXT: .LBB94_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB94_9 ; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11235,52 +11236,51 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_2 ; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_3 ; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_4 ; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_5 ; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_6 ; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_7 ; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB94_8 ; RV32ZVE32F-NEXT: j .LBB94_9 ; @@ -11309,8 +11309,8 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB94_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB94_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11408,31 +11408,32 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB95_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_11 ; RV32ZVE32F-NEXT: .LBB95_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_12 ; RV32ZVE32F-NEXT: .LBB95_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_13 ; RV32ZVE32F-NEXT: .LBB95_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_14 ; RV32ZVE32F-NEXT: .LBB95_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_15 ; RV32ZVE32F-NEXT: .LBB95_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_16 ; RV32ZVE32F-NEXT: .LBB95_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB95_9 ; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11450,52 +11451,51 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_2 ; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_3 ; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_4 ; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_5 +; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_5 ; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_6 ; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_7 ; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB95_8 ; RV32ZVE32F-NEXT: j .LBB95_9 ; @@ -11526,8 +11526,8 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB95_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB95_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11632,45 +11632,46 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: lw a3, 56(a2) ; RV32ZVE32F-NEXT: lw a4, 48(a2) ; RV32ZVE32F-NEXT: lw a5, 40(a2) -; RV32ZVE32F-NEXT: lw a6, 32(a2) -; RV32ZVE32F-NEXT: lw a7, 24(a2) -; RV32ZVE32F-NEXT: lw t0, 16(a2) -; RV32ZVE32F-NEXT: lw t1, 8(a2) +; RV32ZVE32F-NEXT: lw a6, 8(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw t0, 24(a2) +; RV32ZVE32F-NEXT: lw a2, 32(a2) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB96_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_11 ; RV32ZVE32F-NEXT: .LBB96_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_12 ; RV32ZVE32F-NEXT: .LBB96_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_13 ; RV32ZVE32F-NEXT: .LBB96_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_14 ; RV32ZVE32F-NEXT: .LBB96_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_15 ; RV32ZVE32F-NEXT: .LBB96_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_16 ; RV32ZVE32F-NEXT: .LBB96_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB96_9 ; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11688,52 +11689,51 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_2 ; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_3 ; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_4 ; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_5 ; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_6 ; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_7 ; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB96_8 ; RV32ZVE32F-NEXT: j .LBB96_9 ; @@ -11882,8 +11882,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB97_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11900,8 +11900,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 ; RV64ZVE32F-NEXT: .LBB97_8: # %else11 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 @@ -11914,8 +11914,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5 ; RV64ZVE32F-NEXT: .LBB97_10: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 @@ -11939,8 +11939,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB97_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else29 @@ -11962,8 +11962,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13 ; RV64ZVE32F-NEXT: .LBB97_20: # %else38 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40 @@ -12092,22 +12092,22 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vmv1r.v v12, v10 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v10, 16 +; RV64V-NEXT: vslidedown.vi v12, v10, 16 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v8, v8, 16 -; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64V-NEXT: vslidedown.vi v0, v0, 2 -; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64V-NEXT: vslideup.vi v12, v10, 16 -; RV64V-NEXT: vmv.v.v v8, v12 +; RV64V-NEXT: vslideup.vi v10, v12, 16 +; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: @@ -12139,8 +12139,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -12157,8 +12157,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: .LBB98_8: # %else11 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 @@ -12171,8 +12171,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 ; RV64ZVE32F-NEXT: .LBB98_10: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 @@ -12196,8 +12196,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 @@ -12220,8 +12220,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 @@ -12244,8 +12244,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else41 @@ -12269,8 +12269,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_28: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 45 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else53 @@ -12287,8 +12287,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20 ; RV64ZVE32F-NEXT: .LBB98_32: # %else59 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61 @@ -12301,8 +12301,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21 ; RV64ZVE32F-NEXT: .LBB98_34: # %else62 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 41 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else65 @@ -12326,8 +12326,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_39: # %else74 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 37 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else77 @@ -12349,8 +12349,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 ; RV64ZVE32F-NEXT: .LBB98_44: # %else86 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 33 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88 @@ -12627,29 +12627,40 @@ define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) { ; This looks like a strided load (at i8), but isn't at index type. define <4 x i32> @mgather_narrow_edge_case(ptr %base) { -; RV32-LABEL: mgather_narrow_edge_case: -; RV32: # %bb.0: -; RV32-NEXT: li a1, -512 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: vmv.v.x v8, a1 -; RV32-NEXT: vmerge.vim v8, v8, 0, v0 -; RV32-NEXT: vluxei32.v v8, (a0), v8 -; RV32-NEXT: ret +; RV32V-LABEL: mgather_narrow_edge_case: +; RV32V: # %bb.0: +; RV32V-NEXT: li a1, -512 +; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32V-NEXT: vmv.v.i v0, 5 +; RV32V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32V-NEXT: vmv.v.x v8, a1 +; RV32V-NEXT: vmerge.vim v8, v8, 0, v0 +; RV32V-NEXT: vluxei32.v v8, (a0), v8 +; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_narrow_edge_case: ; RV64V: # %bb.0: ; RV64V-NEXT: li a1, -512 -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64V-NEXT: vmv.v.i v0, 5 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vmerge.vim v10, v8, 0, v0 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64V-NEXT: vluxei64.v v8, (a0), v10 ; RV64V-NEXT: ret ; +; RV32ZVE32F-LABEL: mgather_narrow_edge_case: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: li a1, -512 +; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV32ZVE32F-NEXT: vmv.v.i v0, 5 +; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.v.x v8, a1 +; RV32ZVE32F-NEXT: vmerge.vim v8, v8, 0, v0 +; RV32ZVE32F-NEXT: vluxei32.v v8, (a0), v8 +; RV32ZVE32F-NEXT: ret +; ; RV64ZVE32F-LABEL: mgather_narrow_edge_case: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: addi a1, a0, -512 @@ -12728,8 +12739,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 ; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v8, v8, a7 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; @@ -12803,8 +12814,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: vmv.v.x v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-NEXT: vslide1down.vx v8, v8, a6 -; RV64V-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-NEXT: vmv.v.i v0, 15 +; RV64V-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64V-NEXT: addi sp, s0, -128 ; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload @@ -12854,8 +12865,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12881,23 +12892,23 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: lh a3, 10(a0) -; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 24(a0) -; RV64ZVE32F-NEXT: lh a6, 26(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 16 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 8(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 16 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 24(a0) +; RV64ZVE32F-NEXT: lh a0, 26(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12925,24 +12936,24 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 4 -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: lh a3, 12(a0) -; RV64ZVE32F-NEXT: lh a4, 14(a0) -; RV64ZVE32F-NEXT: lh a5, 22(a0) -; RV64ZVE32F-NEXT: lh a6, 28(a0) -; RV64ZVE32F-NEXT: lh a7, 30(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 20 +; RV64ZVE32F-NEXT: addi a2, a0, 4 +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 12(a0) +; RV64ZVE32F-NEXT: lh a5, 14(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 20 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 28(a0) +; RV64ZVE32F-NEXT: lh a0, 30(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -12970,24 +12981,24 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 28 -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: lh a3, 24(a0) -; RV64ZVE32F-NEXT: lh a4, 26(a0) -; RV64ZVE32F-NEXT: lh a5, 22(a0) -; RV64ZVE32F-NEXT: lh a6, 16(a0) -; RV64ZVE32F-NEXT: lh a7, 18(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 20 +; RV64ZVE32F-NEXT: addi a2, a0, 28 +; RV64ZVE32F-NEXT: lh a3, 30(a0) +; RV64ZVE32F-NEXT: lh a4, 24(a0) +; RV64ZVE32F-NEXT: lh a5, 26(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 20 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 16(a0) +; RV64ZVE32F-NEXT: lh a0, 18(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13015,24 +13026,24 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 28 -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: lh a3, 20(a0) -; RV64ZVE32F-NEXT: lh a4, 22(a0) -; RV64ZVE32F-NEXT: lh a5, 14(a0) -; RV64ZVE32F-NEXT: lh a6, 4(a0) -; RV64ZVE32F-NEXT: lh a7, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 12 +; RV64ZVE32F-NEXT: addi a2, a0, 28 +; RV64ZVE32F-NEXT: lh a3, 30(a0) +; RV64ZVE32F-NEXT: lh a4, 20(a0) +; RV64ZVE32F-NEXT: lh a5, 22(a0) +; RV64ZVE32F-NEXT: lh a6, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 12 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13059,23 +13070,23 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: lh a3, 18(a0) -; RV64ZVE32F-NEXT: lh a4, 10(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 16(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13105,23 +13116,23 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: lh a3, 20(a0) -; RV64ZVE32F-NEXT: lh a4, 10(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 18(a0) +; RV64ZVE32F-NEXT: lh a4, 20(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13152,23 +13163,23 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 2 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 18(a0) -; RV64ZVE32F-NEXT: lh a4, 20(a0) -; RV64ZVE32F-NEXT: lh a5, 10(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: addi a2, a0, 2 +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 20(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 10(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13202,23 +13213,23 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 6(a0) -; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 20(a0) -; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 16 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 6(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 16 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 20(a0) +; RV64ZVE32F-NEXT: lh a0, 22(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13249,23 +13260,23 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 6(a0) -; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 20(a0) -; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 16 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 6(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 16 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 20(a0) +; RV64ZVE32F-NEXT: lh a0, 22(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13305,23 +13316,23 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_rotate: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 10(a0) -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: lh a3, 14(a0) -; RV64ZVE32F-NEXT: lh a4, 2(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 10(a0) +; RV64ZVE32F-NEXT: lh a3, 12(a0) +; RV64ZVE32F-NEXT: lh a4, 14(a0) +; RV64ZVE32F-NEXT: lh a5, 2(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13352,23 +13363,23 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 4(a0) -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: lh a3, 2(a0) -; RV64ZVE32F-NEXT: lh a4, 10(a0) -; RV64ZVE32F-NEXT: lh a5, 12(a0) -; RV64ZVE32F-NEXT: lh a6, 14(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 4(a0) +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 12(a0) +; RV64ZVE32F-NEXT: lh a0, 14(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13853,12 +13864,12 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: addi a1, a0, 136 ; RV64ZVE32F-NEXT: lw a2, 140(a0) -; RV64ZVE32F-NEXT: lw a3, 0(a0) -; RV64ZVE32F-NEXT: lw a0, 4(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vlse32.v v8, (a1), zero +; RV64ZVE32F-NEXT: lw a1, 0(a0) +; RV64ZVE32F-NEXT: lw a0, 4(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr i32, ptr %base, <4 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index 4bbda2152a6f97..ad075e4b4e198c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -401,14 +401,14 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi a3, a1, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: vle64.v v0, (a1) +; RV32-NEXT: vle64.v v24, (a3) ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v0, v24 -; RV32-NEXT: vmseq.vv v0, v16, v24 +; RV32-NEXT: vmseq.vv v8, v0, v16 +; RV32-NEXT: vmseq.vv v0, v24, v16 ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: vle64.v v16, (a1), v0.t ; RV32-NEXT: vmv1r.v v0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index aa815e18ac1014..52b2ffbc21c01a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -335,18 +335,18 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vse8.v v8, (a2) +; RV64ZVE32F-NEXT: vse8.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse8.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse8.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse8.v v8, (a1) +; RV64ZVE32F-NEXT: vse8.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1)) ret void @@ -504,8 +504,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -526,8 +526,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB9_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -689,11 +689,11 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 ; ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 @@ -747,13 +747,14 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a1 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 @@ -852,18 +853,18 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void @@ -1025,8 +1026,8 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1048,8 +1049,8 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB18_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1158,8 +1159,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1181,8 +1182,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB19_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1292,8 +1293,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1316,8 +1317,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB20_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1430,8 +1431,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB21_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1453,8 +1454,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB21_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1625,11 +1626,12 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.v.x v8, a0 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 -; RV64ZVE32F-NEXT: andi a1, a0, 1 -; RV64ZVE32F-NEXT: bnez a1, .LBB24_3 +; RV64ZVE32F-NEXT: andi a4, a0, 1 +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: bnez a4, .LBB24_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 @@ -1727,18 +1729,18 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a2) +; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a1) +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void @@ -1903,8 +1905,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1927,8 +1929,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB29_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2039,8 +2041,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2063,8 +2065,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB30_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2179,8 +2181,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2204,8 +2206,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB31_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2323,8 +2325,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB32_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2347,8 +2349,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB32_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2460,8 +2462,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB33_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2484,8 +2486,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB33_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2601,8 +2603,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB34_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2626,8 +2628,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: .LBB34_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2742,8 +2744,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2766,8 +2768,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB35_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -3074,17 +3076,17 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV32ZVE32F-NEXT: lw a3, 20(a0) ; RV32ZVE32F-NEXT: lw a4, 16(a0) ; RV32ZVE32F-NEXT: lw a5, 12(a0) -; RV32ZVE32F-NEXT: lw a6, 8(a0) -; RV32ZVE32F-NEXT: lw a7, 0(a0) -; RV32ZVE32F-NEXT: lw a0, 4(a0) +; RV32ZVE32F-NEXT: lw a6, 0(a0) +; RV32ZVE32F-NEXT: lw a7, 4(a0) +; RV32ZVE32F-NEXT: lw a0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v8 -; RV32ZVE32F-NEXT: sw a7, 0(t0) -; RV32ZVE32F-NEXT: sw a0, 4(t0) +; RV32ZVE32F-NEXT: sw a6, 0(t0) +; RV32ZVE32F-NEXT: sw a7, 4(t0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a6, 0(a0) -; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: vmv.x.s a6, v9 +; RV32ZVE32F-NEXT: sw a0, 0(a6) +; RV32ZVE32F-NEXT: sw a5, 4(a6) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 ; RV32ZVE32F-NEXT: sw a4, 0(a0) @@ -3383,42 +3385,43 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB42_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB42_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11 ; RV32ZVE32F-NEXT: .LBB42_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12 ; RV32ZVE32F-NEXT: .LBB42_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13 ; RV32ZVE32F-NEXT: .LBB42_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14 ; RV32ZVE32F-NEXT: .LBB42_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15 ; RV32ZVE32F-NEXT: .LBB42_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16 ; RV32ZVE32F-NEXT: .LBB42_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9 ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3433,45 +3436,44 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3479,7 +3481,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3487,7 +3489,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8 ; RV32ZVE32F-NEXT: j .LBB42_9 ; @@ -3523,8 +3525,8 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB42_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -3627,42 +3629,43 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB43_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB43_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11 ; RV32ZVE32F-NEXT: .LBB43_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12 ; RV32ZVE32F-NEXT: .LBB43_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13 ; RV32ZVE32F-NEXT: .LBB43_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14 ; RV32ZVE32F-NEXT: .LBB43_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15 ; RV32ZVE32F-NEXT: .LBB43_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16 ; RV32ZVE32F-NEXT: .LBB43_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9 ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3677,45 +3680,44 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3723,7 +3725,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3731,7 +3733,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8 ; RV32ZVE32F-NEXT: j .LBB43_9 ; @@ -3767,8 +3769,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -3873,42 +3875,43 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB44_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB44_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11 ; RV32ZVE32F-NEXT: .LBB44_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12 ; RV32ZVE32F-NEXT: .LBB44_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13 ; RV32ZVE32F-NEXT: .LBB44_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14 ; RV32ZVE32F-NEXT: .LBB44_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15 ; RV32ZVE32F-NEXT: .LBB44_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16 ; RV32ZVE32F-NEXT: .LBB44_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3923,45 +3926,44 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3969,7 +3971,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3977,7 +3979,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8 ; RV32ZVE32F-NEXT: j .LBB44_9 ; @@ -4015,8 +4017,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4126,42 +4128,43 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB45_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB45_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4176,45 +4179,44 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4222,7 +4224,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4230,7 +4232,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8 ; RV32ZVE32F-NEXT: j .LBB45_9 ; @@ -4267,8 +4269,8 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB45_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4371,42 +4373,43 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB46_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB46_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4421,45 +4424,44 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4467,7 +4469,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4475,7 +4477,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8 ; RV32ZVE32F-NEXT: j .LBB46_9 ; @@ -4512,8 +4514,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB46_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4618,42 +4620,43 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB47_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB47_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4668,45 +4671,44 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4714,7 +4716,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4722,7 +4724,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV32ZVE32F-NEXT: j .LBB47_9 ; @@ -4763,8 +4765,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB47_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a0, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4872,42 +4874,43 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: lw a4, 52(a0) ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t0, 40(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB48_10 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB48_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4922,53 +4925,52 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4976,7 +4978,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8 ; RV32ZVE32F-NEXT: j .LBB48_9 ; @@ -5013,8 +5015,8 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: .LBB48_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -5115,42 +5117,43 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a4, 52(a0) ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t0, 40(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB49_10 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB49_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5165,53 +5168,52 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5219,7 +5221,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8 ; RV32ZVE32F-NEXT: j .LBB49_9 ; @@ -5256,8 +5258,8 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB49_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -5359,42 +5361,43 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a4, 52(a0) ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t0, 40(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB50_10 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB50_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5409,53 +5412,52 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5463,7 +5465,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8 ; RV32ZVE32F-NEXT: j .LBB50_9 ; @@ -5502,8 +5504,8 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB50_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -5598,17 +5600,16 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -48 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48 -; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: addi sp, sp, -32 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 32 +; RV32ZVE32F-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 24(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s2, 20(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s4, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s5, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s6, 4(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s7, 0(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 @@ -5617,7 +5618,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: .cfi_offset s5, -24 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32 -; RV32ZVE32F-NEXT: .cfi_offset s8, -36 ; RV32ZVE32F-NEXT: lw a3, 60(a0) ; RV32ZVE32F-NEXT: lw a4, 56(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) @@ -5635,45 +5635,46 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: lw s2, 56(a2) ; RV32ZVE32F-NEXT: lw s3, 48(a2) ; RV32ZVE32F-NEXT: lw s4, 40(a2) -; RV32ZVE32F-NEXT: lw s5, 32(a2) -; RV32ZVE32F-NEXT: lw s6, 24(a2) -; RV32ZVE32F-NEXT: lw s7, 16(a2) -; RV32ZVE32F-NEXT: lw s8, 8(a2) +; RV32ZVE32F-NEXT: lw s5, 8(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6 +; RV32ZVE32F-NEXT: lw s6, 16(a2) +; RV32ZVE32F-NEXT: lw s7, 24(a2) +; RV32ZVE32F-NEXT: lw a2, 32(a2) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB51_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi s2, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB51_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a2, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a2, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_12 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a2, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_13 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a2, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_14 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a2, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_15 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a2, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_16 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a2, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5682,25 +5683,23 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB51_9: # %else14 -; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: addi sp, sp, 48 +; RV32ZVE32F-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s2, 20(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s4, 12(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s5, 8(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s6, 4(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s7, 0(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: addi sp, sp, 32 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store -; RV32ZVE32F-NEXT: lw a2, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a2, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a2, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5708,7 +5707,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a2, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5716,7 +5715,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t6, 0(a0) ; RV32ZVE32F-NEXT: sw t5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a2, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5724,7 +5723,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t4, 0(a0) ; RV32ZVE32F-NEXT: sw t3, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a2, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5732,7 +5731,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t2, 0(a0) ; RV32ZVE32F-NEXT: sw t1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a2, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5740,7 +5739,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a2, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5748,7 +5747,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a2, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8 ; RV32ZVE32F-NEXT: j .LBB51_9 ; @@ -6022,18 +6021,18 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void @@ -6195,8 +6194,8 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: .LBB58_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6218,8 +6217,8 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB58_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6328,8 +6327,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB59_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6351,8 +6350,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB59_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6462,8 +6461,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB60_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6486,8 +6485,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB60_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6600,8 +6599,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB61_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6623,8 +6622,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB61_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6844,18 +6843,18 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a2) +; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a1) +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void @@ -7020,8 +7019,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: .LBB68_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7044,8 +7043,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB68_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7156,8 +7155,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB69_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7180,8 +7179,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB69_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7296,8 +7295,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB70_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7321,8 +7320,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB70_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7440,8 +7439,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: .LBB71_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7464,8 +7463,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB71_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7577,8 +7576,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB72_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7601,8 +7600,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB72_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7718,8 +7717,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB73_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7743,8 +7742,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: .LBB73_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7859,8 +7858,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7883,8 +7882,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB74_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -8394,81 +8393,81 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB81_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_10 ; RV32ZVE32F-NEXT: .LBB81_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_11 ; RV32ZVE32F-NEXT: .LBB81_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_12 ; RV32ZVE32F-NEXT: .LBB81_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_13 ; RV32ZVE32F-NEXT: .LBB81_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_14 ; RV32ZVE32F-NEXT: .LBB81_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_15 ; RV32ZVE32F-NEXT: .LBB81_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_16 ; RV32ZVE32F-NEXT: .LBB81_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_2 ; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_3 ; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_4 ; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_5 ; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_6 ; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_7 ; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_8 ; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8501,8 +8500,8 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: .LBB81_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -8594,81 +8593,81 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_9 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_10 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB82_9 +; RV32ZVE32F-NEXT: # %bb.1: # %else +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_10 ; RV32ZVE32F-NEXT: .LBB82_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_11 ; RV32ZVE32F-NEXT: .LBB82_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_12 ; RV32ZVE32F-NEXT: .LBB82_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_13 ; RV32ZVE32F-NEXT: .LBB82_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_14 ; RV32ZVE32F-NEXT: .LBB82_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_15 ; RV32ZVE32F-NEXT: .LBB82_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_16 ; RV32ZVE32F-NEXT: .LBB82_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_2 ; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_3 ; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_4 ; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_5 ; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_6 ; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_7 ; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_8 ; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8701,8 +8700,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB82_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -8796,81 +8795,81 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB83_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_10 ; RV32ZVE32F-NEXT: .LBB83_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_11 ; RV32ZVE32F-NEXT: .LBB83_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_12 ; RV32ZVE32F-NEXT: .LBB83_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_13 ; RV32ZVE32F-NEXT: .LBB83_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_14 ; RV32ZVE32F-NEXT: .LBB83_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_15 ; RV32ZVE32F-NEXT: .LBB83_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_16 ; RV32ZVE32F-NEXT: .LBB83_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_2 ; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_3 ; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_4 ; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_5 ; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_6 ; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_7 ; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_8 ; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8905,8 +8904,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB83_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9005,81 +9004,81 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB84_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_10 ; RV32ZVE32F-NEXT: .LBB84_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_11 ; RV32ZVE32F-NEXT: .LBB84_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_12 ; RV32ZVE32F-NEXT: .LBB84_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_13 ; RV32ZVE32F-NEXT: .LBB84_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_14 ; RV32ZVE32F-NEXT: .LBB84_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_15 ; RV32ZVE32F-NEXT: .LBB84_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_16 ; RV32ZVE32F-NEXT: .LBB84_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_2 ; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_3 ; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_4 ; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_5 ; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_6 ; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_7 ; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_8 ; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9113,8 +9112,8 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9206,81 +9205,81 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB85_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_10 ; RV32ZVE32F-NEXT: .LBB85_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_11 ; RV32ZVE32F-NEXT: .LBB85_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_12 ; RV32ZVE32F-NEXT: .LBB85_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_13 ; RV32ZVE32F-NEXT: .LBB85_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_14 ; RV32ZVE32F-NEXT: .LBB85_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_15 ; RV32ZVE32F-NEXT: .LBB85_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_16 ; RV32ZVE32F-NEXT: .LBB85_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_2 ; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_3 ; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_4 ; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_5 ; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_6 ; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_7 ; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_8 ; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9314,8 +9313,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB85_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9409,81 +9408,81 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB86_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_10 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_11 ; RV32ZVE32F-NEXT: .LBB86_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_12 ; RV32ZVE32F-NEXT: .LBB86_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_13 ; RV32ZVE32F-NEXT: .LBB86_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_14 ; RV32ZVE32F-NEXT: .LBB86_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_15 ; RV32ZVE32F-NEXT: .LBB86_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_16 ; RV32ZVE32F-NEXT: .LBB86_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_2 ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_3 ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_4 ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_5 ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_6 ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_7 ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_8 ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9521,8 +9520,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB86_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9619,81 +9618,81 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB87_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_10 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_11 ; RV32ZVE32F-NEXT: .LBB87_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_12 ; RV32ZVE32F-NEXT: .LBB87_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_13 ; RV32ZVE32F-NEXT: .LBB87_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_14 ; RV32ZVE32F-NEXT: .LBB87_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_15 ; RV32ZVE32F-NEXT: .LBB87_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_16 ; RV32ZVE32F-NEXT: .LBB87_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_2 ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_3 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_4 +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_4 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_5 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_6 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_7 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_8 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9727,8 +9726,8 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9818,81 +9817,81 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB88_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_10 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_11 ; RV32ZVE32F-NEXT: .LBB88_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_12 ; RV32ZVE32F-NEXT: .LBB88_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_13 ; RV32ZVE32F-NEXT: .LBB88_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_14 ; RV32ZVE32F-NEXT: .LBB88_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_15 ; RV32ZVE32F-NEXT: .LBB88_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_16 ; RV32ZVE32F-NEXT: .LBB88_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_2 ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_3 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_4 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_5 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_6 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_7 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_8 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9926,8 +9925,8 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10018,81 +10017,81 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB89_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_10 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_11 ; RV32ZVE32F-NEXT: .LBB89_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_12 ; RV32ZVE32F-NEXT: .LBB89_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_13 ; RV32ZVE32F-NEXT: .LBB89_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_14 ; RV32ZVE32F-NEXT: .LBB89_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_15 ; RV32ZVE32F-NEXT: .LBB89_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_16 ; RV32ZVE32F-NEXT: .LBB89_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_2 ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_3 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_4 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_5 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_6 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_7 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_8 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10128,8 +10127,8 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10227,95 +10226,95 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32ZVE32F-NEXT: lw a2, 56(a1) ; RV32ZVE32F-NEXT: lw a3, 48(a1) ; RV32ZVE32F-NEXT: lw a4, 40(a1) -; RV32ZVE32F-NEXT: lw a5, 32(a1) -; RV32ZVE32F-NEXT: lw a6, 24(a1) -; RV32ZVE32F-NEXT: lw a7, 16(a1) -; RV32ZVE32F-NEXT: lw t0, 8(a1) +; RV32ZVE32F-NEXT: lw a5, 8(a1) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a1), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: lw a6, 16(a1) +; RV32ZVE32F-NEXT: lw a7, 24(a1) +; RV32ZVE32F-NEXT: lw a1, 32(a1) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB90_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_10 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_11 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_12 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_13 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_14 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_15 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_16 ; RV32ZVE32F-NEXT: .LBB90_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_2 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_3 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_4 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_5 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_6 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_7 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_8 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10457,8 +10456,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10474,8 +10473,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_8: # %else8 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 @@ -10487,8 +10486,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_10: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 @@ -10511,8 +10510,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB91_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else20 @@ -10533,8 +10532,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB91_20: # %else26 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27 @@ -10656,11 +10655,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vslidedown.vi v10, v10, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret ; @@ -10689,8 +10688,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10706,8 +10705,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_8: # %else8 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 @@ -10719,8 +10718,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_10: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 @@ -10743,8 +10742,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 @@ -10765,8 +10764,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_19: # %else22 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 @@ -10787,8 +10786,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB92_23: # %else26 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else28 @@ -10812,8 +10811,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_28: # %else34 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 45 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else36 @@ -10830,8 +10829,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_32: # %else40 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41 @@ -10844,8 +10843,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_34: # %else42 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 41 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else44 @@ -10869,8 +10868,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_39: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 37 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else52 @@ -10892,8 +10891,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_44: # %else58 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 33 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index b3011d0f01cab1..86c28247e97ef1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -401,54 +401,41 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 18 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: addi a3, a2, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v8, (a3) -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 3 -; RV32-NEXT: add a3, a4, a3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v0, (a2) -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v0, v24 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vmseq.vv v7, v24, v8 ; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle64.v v8, (a2) +; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a2, a0, 3 -; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v0, v16, v24 +; RV32-NEXT: vmseq.vv v0, v16, v8 ; RV32-NEXT: addi a0, a1, 128 -; RV32-NEXT: vse64.v v8, (a0), v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vse64.v v24, (a0), v0.t +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vse64.v v8, (a1), v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 18 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index 19f3d3ce19fa4c..93b4f7d2a9c9fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -19,9 +19,9 @@ define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -38,9 +38,9 @@ define <2 x half> @vp_nearbyint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v @@ -61,9 +61,9 @@ define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -80,9 +80,9 @@ define <4 x half> @vp_nearbyint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v @@ -103,9 +103,9 @@ define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -122,9 +122,9 @@ define <8 x half> @vp_nearbyint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v @@ -135,21 +135,21 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -166,9 +166,9 @@ define <16 x half> @vp_nearbyint_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v @@ -189,9 +189,9 @@ define <2 x float> @vp_nearbyint_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -208,9 +208,9 @@ define <2 x float> @vp_nearbyint_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -231,9 +231,9 @@ define <4 x float> @vp_nearbyint_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -250,9 +250,9 @@ define <4 x float> @vp_nearbyint_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -271,13 +271,13 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -294,9 +294,9 @@ define <8 x float> @vp_nearbyint_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -315,13 +315,13 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -338,9 +338,9 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -361,9 +361,9 @@ define <2 x double> @vp_nearbyint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -380,9 +380,9 @@ define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext % ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -393,21 +393,21 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -424,9 +424,9 @@ define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext % ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -437,21 +437,21 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -468,9 +468,9 @@ define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext % ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -481,21 +481,21 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -512,9 +512,9 @@ define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroex ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v @@ -525,21 +525,21 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -556,9 +556,9 @@ define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroex ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v @@ -569,17 +569,9 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -588,43 +580,36 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vmv.v.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v @@ -660,9 +645,9 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index 5f456c7824316b..a998ebcd4a5114 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -163,12 +163,12 @@ define i32 @reduce_sum_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, 224 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -192,12 +192,12 @@ define i32 @reduce_sum_16xi32_prefix6(ptr %p) { ; CHECK-NEXT: li a1, 192 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -221,10 +221,10 @@ define i32 @reduce_sum_16xi32_prefix7(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredsum.vs v8, v10, v8 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -248,9 +248,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vredsum.vs v8, v10, v8 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -535,12 +535,12 @@ define i32 @reduce_xor_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, 224 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -576,17 +576,17 @@ define i32 @reduce_and_16xi32_prefix2(ptr %p) { define i32 @reduce_and_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_and_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredand.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -623,12 +623,12 @@ define i32 @reduce_or_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, 224 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -668,17 +668,17 @@ define i32 @reduce_smax_16xi32_prefix2(ptr %p) { define i32 @reduce_smax_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smax_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredmax.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -713,17 +713,17 @@ define i32 @reduce_smin_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smin_16xi32_prefix5: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredmin.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -760,12 +760,12 @@ define i32 @reduce_umax_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, 224 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -800,17 +800,17 @@ define i32 @reduce_umin_16xi32_prefix2(ptr %p) { define i32 @reduce_umin_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_umin_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredminu.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -830,9 +830,9 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xf32_prefix2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vfredusum.vs v8, v9, v8 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 @@ -845,17 +845,17 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { define float @reduce_fadd_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vfredusum.vs v8, v10, v8 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 9df160bf30f005..7adaaa05f9dd91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -177,8 +177,8 @@ declare float @llvm.vp.reduce.fadd.v64f32(float, <64 x float>, <64 x i1>, i32) define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB8_2 @@ -193,8 +193,8 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret @@ -205,8 +205,8 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB9_2 @@ -221,8 +221,8 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 7dcfb247d37cbe..a6763fa22822ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1853,9 +1853,9 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 @@ -2188,8 +2188,8 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) @@ -2286,9 +2286,9 @@ define double @vreduce_fminimum_v64f64_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle64.v v16, (a2) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vle64.v v0, (a1) @@ -2563,9 +2563,9 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 @@ -2898,8 +2898,8 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) @@ -2996,9 +2996,9 @@ define double @vreduce_fmaximum_v64f64_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle64.v v16, (a2) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vle64.v v0, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 02a989a9699606..016f95bfef7e71 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -801,8 +801,8 @@ declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32) define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB49_2 @@ -817,8 +817,8 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> ; CHECK-NEXT: sltu a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret @@ -1750,9 +1750,9 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: lui a2, %hi(.LCPI72_0) ; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle8.v v12, (a2) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vid.v v16 @@ -1794,9 +1794,9 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: li a3, 32 -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: lui a2, %hi(.LCPI72_0) ; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle8.v v12, (a2) ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vid.v v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 6c75c9b9c29498..28ce6a12c4c89d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1540,22 +1540,21 @@ define i64 @vwreduce_add_v64i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: vslidedown.vi v0, v16, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv4r.v v8, v0 -; RV32-NEXT: vwadd.vv v0, v24, v8 +; RV32-NEXT: vwadd.vv v8, v24, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vwadd.vv v0, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v0, v8 ; RV32-NEXT: vmv.s.x v16, zero ; RV32-NEXT: vredsum.vs v8, v8, v16 @@ -1588,22 +1587,21 @@ define i64 @vwreduce_add_v64i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: vslidedown.vi v0, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vwadd.vv v0, v24, v8 +; RV64-NEXT: vwadd.vv v8, v24, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwadd.vv v0, v8, v16 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v8, v0, v8 ; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vredsum.vs v8, v8, v16 @@ -1639,22 +1637,21 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: vslidedown.vi v0, v16, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv4r.v v8, v0 -; RV32-NEXT: vwaddu.vv v0, v24, v8 +; RV32-NEXT: vwaddu.vv v8, v24, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vwaddu.vv v0, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v0, v8 ; RV32-NEXT: vmv.s.x v16, zero ; RV32-NEXT: vredsum.vs v8, v8, v16 @@ -1687,22 +1684,21 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: vslidedown.vi v0, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vwaddu.vv v0, v24, v8 +; RV64-NEXT: vwaddu.vv v8, v24, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwaddu.vv v0, v8, v16 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v8, v0, v8 ; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vredsum.vs v8, v8, v16 @@ -2286,9 +2282,9 @@ define i64 @vreduce_and_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -2871,9 +2867,9 @@ define i64 @vreduce_or_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -4074,9 +4070,9 @@ define i64 @vreduce_smin_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -4659,9 +4655,9 @@ define i64 @vreduce_smax_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -5244,9 +5240,9 @@ define i64 @vreduce_umin_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -5829,9 +5825,9 @@ define i64 @vreduce_umax_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index a1f010f98ab40b..dc0f4e74305550 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -24,8 +24,8 @@ define zeroext i1 @vpreduce_or_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -40,8 +40,8 @@ define zeroext i1 @vpreduce_xor_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -72,8 +72,8 @@ define zeroext i1 @vpreduce_or_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -88,8 +88,8 @@ define zeroext i1 @vpreduce_xor_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -120,8 +120,8 @@ define zeroext i1 @vpreduce_or_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -136,8 +136,8 @@ define zeroext i1 @vpreduce_xor_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -168,8 +168,8 @@ define zeroext i1 @vpreduce_or_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -184,8 +184,8 @@ define zeroext i1 @vpreduce_xor_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -264,8 +264,8 @@ define zeroext i1 @vpreduce_or_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-LABEL: vpreduce_or_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -280,8 +280,8 @@ define zeroext i1 @vpreduce_xor_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-LABEL: vpreduce_xor_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -296,8 +296,8 @@ define zeroext i1 @vpreduce_add_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -312,8 +312,8 @@ define zeroext i1 @vpreduce_add_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -328,8 +328,8 @@ define zeroext i1 @vpreduce_add_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -344,8 +344,8 @@ define zeroext i1 @vpreduce_add_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -360,8 +360,8 @@ define zeroext i1 @vpreduce_add_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-LABEL: vpreduce_add_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -488,8 +488,8 @@ define zeroext i1 @vpreduce_smin_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -504,8 +504,8 @@ define zeroext i1 @vpreduce_smin_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -520,8 +520,8 @@ define zeroext i1 @vpreduce_smin_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -536,8 +536,8 @@ define zeroext i1 @vpreduce_smin_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -552,8 +552,8 @@ define zeroext i1 @vpreduce_smin_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m ; CHECK-LABEL: vpreduce_smin_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -568,8 +568,8 @@ define zeroext i1 @vpreduce_smin_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m ; CHECK-LABEL: vpreduce_smin_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -584,8 +584,8 @@ define zeroext i1 @vpreduce_smin_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m ; CHECK-LABEL: vpreduce_smin_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -600,8 +600,8 @@ define zeroext i1 @vpreduce_umax_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -616,8 +616,8 @@ define zeroext i1 @vpreduce_umax_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -632,8 +632,8 @@ define zeroext i1 @vpreduce_umax_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -648,8 +648,8 @@ define zeroext i1 @vpreduce_umax_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -664,8 +664,8 @@ define zeroext i1 @vpreduce_umax_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m ; CHECK-LABEL: vpreduce_umax_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -680,8 +680,8 @@ define zeroext i1 @vpreduce_umax_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m ; CHECK-LABEL: vpreduce_umax_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -696,8 +696,8 @@ define zeroext i1 @vpreduce_umax_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m ; CHECK-LABEL: vpreduce_umax_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 920d0d5fe7ba74..1f856d04ca89fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -123,15 +123,15 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -246,8 +246,8 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -286,8 +286,8 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -357,15 +357,15 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -397,15 +397,15 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -437,15 +437,15 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -477,15 +477,15 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -517,65 +517,54 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -597,17 +586,20 @@ define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v7, v24, fa5 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 716cf7b0f46fa5..0f587232680df6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 603f9397dc90f1..0fb7e6a7de5696 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index a5adfc36887ad0..927f96b6442274 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll index a4ab67f41595d4..80561be0ca2f5f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll @@ -115,17 +115,17 @@ define signext i32 @sad_2block_16xi8_as_i32(ptr %a, ptr %b, i32 signext %stridea ; CHECK-NEXT: vwaddu.vv v10, v9, v8 ; CHECK-NEXT: vminu.vv v8, v12, v13 ; CHECK-NEXT: vmaxu.vv v9, v12, v13 -; CHECK-NEXT: vsub.vv v8, v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a1, a1, a3 -; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vle8.v v12, (a1) +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vle8.v v13, (a1) +; CHECK-NEXT: vsub.vv v8, v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v14, v8 ; CHECK-NEXT: vwaddu.vv v16, v14, v10 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vminu.vv v8, v9, v12 -; CHECK-NEXT: vmaxu.vv v9, v9, v12 +; CHECK-NEXT: vminu.vv v8, v12, v13 +; CHECK-NEXT: vmaxu.vv v9, v12, v13 ; CHECK-NEXT: vsub.vv v8, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 4598bf67a23637..33e9cde4c30abb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1163,31 +1163,31 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFH-NEXT: addi a0, sp, 16 ; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH-NEXT: vslidedown.vi v7, v0, 8 +; ZVFH-NEXT: vslidedown.vi v6, v0, 8 ; ZVFH-NEXT: mv a0, a2 ; ZVFH-NEXT: bltu a2, a3, .LBB43_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: li a0, 64 ; ZVFH-NEXT: .LBB43_2: +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v6, v8, v24, v0.t +; ZVFH-NEXT: vmfeq.vv v7, v8, v24, v0.t ; ZVFH-NEXT: addi a0, a2, -64 ; ZVFH-NEXT: sltu a1, a2, a0 ; ZVFH-NEXT: addi a1, a1, -1 ; ZVFH-NEXT: and a0, a1, a0 +; ZVFH-NEXT: vmv1r.v v0, v6 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: add a1, sp, a1 +; ZVFH-NEXT: addi a1, a1, 16 +; ZVFH-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v24, v16, v8, v0.t +; ZVFH-NEXT: vmfeq.vv v8, v16, v24, v0.t ; ZVFH-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH-NEXT: vslideup.vi v6, v24, 8 -; ZVFH-NEXT: vmv.v.v v0, v6 +; ZVFH-NEXT: vslideup.vi v7, v8, 8 +; ZVFH-NEXT: vmv.v.v v0, v7 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 4 ; ZVFH-NEXT: add sp, sp, a0 @@ -2865,37 +2865,36 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v6, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB87_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB87_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v6, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v7, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v6, v24, 2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vslideup.vi v7, v8, 2 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 21bbca00921d6b..5f3847e085055b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -611,10 +611,10 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: vle8.v v8, (a2) ; CHECK-NEXT: addi a2, a3, -128 ; CHECK-NEXT: sltu a4, a3, a2 -; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a2, a4, a2 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v6, v16, v8, v0.t @@ -622,7 +622,6 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -631,6 +630,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v6 @@ -660,8 +660,8 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB52_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -689,8 +689,8 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB53_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -1264,31 +1264,31 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 4 +; CHECK-NEXT: vslidedown.vi v6, v0, 4 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a3, .LBB99_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: .LBB99_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v6, v8, v24, v0.t +; CHECK-NEXT: vmseq.vv v7, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v6, v24, 4 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vslideup.vi v7, v8, 4 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -1301,8 +1301,8 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB100_2 @@ -1315,8 +1315,8 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v25, v8, 4 @@ -1331,8 +1331,8 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB101_2 @@ -1345,8 +1345,8 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v25, v8, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll index 52596d8892411a..d1980ee3b0a6fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -151,8 +151,8 @@ declare <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB12_2 @@ -167,8 +167,8 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll index 609b4e98248926..925366e8b1d500 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll @@ -33,8 +33,8 @@ define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; VLS-LABEL: concat_4xv2i32: ; VLS: # %bb.0: ; VLS-NEXT: vmv1r.v v13, v10 -; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vmv1r.v v12, v8 +; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vslideup.vi v13, v11, 2 ; VLS-NEXT: vslideup.vi v12, v9, 2 ; VLS-NEXT: vmv2r.v v8, v12 @@ -147,8 +147,8 @@ define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; VLS-NEXT: vmv1r.v v19, v14 ; VLS-NEXT: vmv1r.v v18, v12 ; VLS-NEXT: vmv1r.v v17, v10 -; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vmv1r.v v16, v8 +; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vslideup.vi v19, v15, 2 ; VLS-NEXT: vslideup.vi v18, v13, 2 ; VLS-NEXT: vslideup.vi v17, v11, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index 8499086994bc01..d461fa8378cffc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -164,11 +164,10 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64> define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) { ; CHECK-LABEL: m2_splat_into_slide_two_source: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vrgather.vi v12, v8, 0 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vrgather.vi v12, v8, 0 ; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index 47d7baade8b49e..d70ed2fb0e2665 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -101,10 +101,10 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v32i1: ; NO-ZVBB: # %bb.0: ; NO-ZVBB-NEXT: li a0, 32 +; NO-ZVBB-NEXT: lui a1, %hi(.LCPI4_0) +; NO-ZVBB-NEXT: addi a1, a1, %lo(.LCPI4_0) ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; NO-ZVBB-NEXT: lui a0, %hi(.LCPI4_0) -; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI4_0) -; NO-ZVBB-NEXT: vle8.v v8, (a0) +; NO-ZVBB-NEXT: vle8.v v8, (a1) ; NO-ZVBB-NEXT: vmv.v.i v10, 0 ; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 ; NO-ZVBB-NEXT: vrgather.vv v12, v10, v8 @@ -124,10 +124,10 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v64i1: ; NO-ZVBB: # %bb.0: ; NO-ZVBB-NEXT: li a0, 64 +; NO-ZVBB-NEXT: lui a1, %hi(.LCPI5_0) +; NO-ZVBB-NEXT: addi a1, a1, %lo(.LCPI5_0) ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; NO-ZVBB-NEXT: lui a0, %hi(.LCPI5_0) -; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI5_0) -; NO-ZVBB-NEXT: vle8.v v8, (a0) +; NO-ZVBB-NEXT: vle8.v v8, (a1) ; NO-ZVBB-NEXT: vmv.v.i v12, 0 ; NO-ZVBB-NEXT: vmerge.vim v12, v12, 1, v0 ; NO-ZVBB-NEXT: vrgather.vv v16, v12, v8 @@ -147,10 +147,10 @@ define <128 x i1> @reverse_v128i1(<128 x i1> %a) { ; CHECK-LABEL: reverse_v128i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI6_0) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vrgather.vv v24, v16, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index 038fead011d899..8c74d8f538525c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -19,8 +19,8 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn2.v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -46,13 +46,12 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: trn2.v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -74,8 +73,9 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn2.v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret @@ -98,8 +98,8 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn2.v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -121,8 +121,9 @@ define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) { define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: trn2.v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret @@ -145,8 +146,9 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn2.v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret @@ -167,8 +169,9 @@ define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) { define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK-LABEL: trn2.v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret @@ -189,8 +192,9 @@ define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) { define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: trn2.v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret @@ -213,8 +217,9 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn2.v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret @@ -235,8 +240,9 @@ define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) { define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK-LABEL: trn2.v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret @@ -259,8 +265,9 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn2.v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret @@ -283,8 +290,8 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn2.v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll index 5e93fdfc7a652d..bf0eab77d0ac83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -390,8 +390,8 @@ declare <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -404,8 +404,8 @@ define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index 0e6b03bf16323d..0e1105848440ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -75,9 +75,9 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { ; CHECK-NO-MISALIGN: # %bb.0: ; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0) -; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16 -; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) ; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 +; CHECK-NO-MISALIGN-NEXT: addi a3, a0, 16 +; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a3) ; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24 ; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0) ; CHECK-NO-MISALIGN-NEXT: vle8.v v11, (a2) @@ -186,9 +186,9 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 6 -; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: addi a2, a0, 2 +; CHECK-NEXT: addi a3, a0, 6 +; CHECK-NEXT: vle16.v v10, (a3) ; CHECK-NEXT: addi a0, a0, 8 ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: vle16.v v11, (a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 6a8d2008de74dd..5e64e9fbc1a2f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -480,14 +480,14 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x ; CHECK-NEXT: addi a5, a2, -16 ; CHECK-NEXT: sltu a2, a2, a5 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vlse64.v v16, (a4), a1, v0.t -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %m, i32 %evl) @@ -555,13 +555,13 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: li a4, 16 ; CHECK-RV32-NEXT: .LBB42_6: ; CHECK-RV32-NEXT: mul a5, a4, a2 -; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t -; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV32-NEXT: vse64.v v8, (a0) @@ -605,13 +605,13 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB42_6: ; CHECK-RV64-NEXT: mul a5, a3, a2 -; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t -; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll index dee422a4c17d1a..35f123f1157f22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -420,9 +420,9 @@ define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %strid ; CHECK-NEXT: addi a3, a2, -16 ; CHECK-NEXT: sltu a2, a2, a3 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 9fa8ab39723f74..7513d31b54bd1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -54,8 +54,8 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zero ; CHECK-LABEL: vtrunc_v128i7_v128i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 8 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a1, .LBB4_2 @@ -68,8 +68,8 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zero ; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -243,75 +243,67 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v5, v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v0, 4 +; CHECK-NEXT: vslidedown.vi v25, v0, 8 ; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v5, 4 -; CHECK-NEXT: addi a2, a1, 640 +; CHECK-NEXT: vslidedown.vi v27, v25, 4 +; CHECK-NEXT: addi a3, a1, 640 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: vle64.v v8, (a3) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a7, -64 -; CHECK-NEXT: sltu a3, a7, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a4, a3, a2 -; CHECK-NEXT: addi a2, a4, -32 -; CHECK-NEXT: sltu a3, a4, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a2 -; CHECK-NEXT: addi a2, a3, -16 -; CHECK-NEXT: sltu a5, a3, a2 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a2, a5, a2 ; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a2, a2, a5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: addi a3, a7, -64 +; CHECK-NEXT: sltu a4, a7, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: addi a3, a4, -32 +; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a3, a5, a3 +; CHECK-NEXT: addi a5, a3, -16 +; CHECK-NEXT: sltu a6, a3, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 24 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v26, v7, 4 ; CHECK-NEXT: bltu a3, a2, .LBB16_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: li a3, 64 -; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: vle64.v v16, (a5) ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v28, v26, 2 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mul a3, a3, a6 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_4 +; CHECK-NEXT: bltu a7, a5, .LBB16_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a6, 64 ; CHECK-NEXT: .LBB16_4: @@ -332,10 +324,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: sltu t1, a6, t0 ; CHECK-NEXT: addi t1, t1, -1 ; CHECK-NEXT: and t0, t1, t0 -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: addi t0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (t0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: li t2, 48 +; CHECK-NEXT: mul t1, t1, t2 +; CHECK-NEXT: add t1, sp, t1 +; CHECK-NEXT: addi t1, t1, 16 +; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 4 @@ -346,19 +342,21 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a6, 16 ; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v20, v5, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v27, v25, 2 ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t +; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 @@ -371,13 +369,20 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: li a5, 32 ; CHECK-NEXT: .LBB16_8: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a5, -16 ; CHECK-NEXT: sltu a5, a5, a1 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a1, a5, a1 +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: bltu a4, a2, .LBB16_10 ; CHECK-NEXT: # %bb.9: @@ -385,8 +390,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: .LBB16_10: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v6, v7, 2 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: mv a1, a7 @@ -401,13 +411,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: li a5, 56 ; CHECK-NEXT: mul a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: li a5, 56 @@ -446,19 +456,18 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: sltu a1, a1, a4 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a4 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: bltu a7, a2, .LBB16_14 ; CHECK-NEXT: # %bb.13: ; CHECK-NEXT: li a7, 16 ; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 40 @@ -466,6 +475,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v8, 16 @@ -509,8 +519,8 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext ; CHECK-LABEL: vtrunc_v32i32_v32i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB17_2 @@ -523,8 +533,8 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll index 698c48bc55650c..e28d55f46abcbb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -390,8 +390,8 @@ declare <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -404,8 +404,8 @@ define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 2c62cbd583d00c..5601bd5ee7a3ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -377,8 +377,8 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -416,8 +416,8 @@ define <256 x i8> @vadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -1348,8 +1348,8 @@ declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1365,15 +1365,15 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1386,8 +1386,8 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1468,8 +1468,8 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1479,8 +1479,8 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index 507cf5cc6b80cc..d414be76672ab0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1140,15 +1140,16 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zero ; RV32-LABEL: vand_vx_v11i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v16, v0 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: lui a3, 341 ; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vmerge.vxm v24, v24, a0, v0 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: ret ; @@ -1167,10 +1168,11 @@ define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext % ; RV32-LABEL: vand_vx_v11i64_unmasked: ; RV32: # %bb.0: ; RV32-NEXT: li a3, 32 +; RV32-NEXT: lui a4, 341 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index 01b07b4081e6d9..77a095303675f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -298,37 +298,46 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index f32e2bbf37946c..ae3dce497c6d07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -379,8 +379,8 @@ declare <32 x double> @llvm.vp.fabs.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -393,8 +393,8 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index 0574773fb2fd93..e2e48cee3eacc2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -862,51 +862,51 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a2) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: bltu a4, a1, .LBB50_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -954,25 +954,25 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v0, v8, v24 ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: vmv.v.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index ffa88e28d7dc86..c83a298cb501ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -390,37 +390,46 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index 17f851e172f814..60dbededb90a5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -390,37 +390,46 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index 288efb0f1fc27c..6c695b43d2718e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -626,51 +626,51 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a2) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: bltu a4, a1, .LBB50_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -718,25 +718,25 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v0, v8, v24 ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: vmv.v.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index c36ec25c04f93f..fbc4c56a911340 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -379,8 +379,8 @@ declare <32 x double> @llvm.vp.fneg.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -393,8 +393,8 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll index 6004eb4fe217a3..988b200ae53656 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -379,8 +379,8 @@ declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -393,8 +393,8 @@ define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zero ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll index dd3a50cfd77377..05c7bd990642c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll @@ -105,13 +105,12 @@ define <64 x float> @vfwadd_v64f16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwadd.vv v8, v16, v24 +; CHECK-NEXT: vfwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v8, v16, v0 @@ -216,13 +215,12 @@ define <32 x double> @vfwadd_v32f32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwadd.vv v8, v16, v24 +; CHECK-NEXT: vfwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll index 7eaa1856ce2218..5a57801d33b40d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll @@ -105,13 +105,12 @@ define <64 x float> @vfwmul_v64f16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwmul.vv v8, v16, v24 +; CHECK-NEXT: vfwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v8, v16, v0 @@ -216,13 +215,12 @@ define <32 x double> @vfwmul_v32f32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwmul.vv v8, v16, v24 +; CHECK-NEXT: vfwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll index 8cf7c5f1758654..2c706cad9742ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll @@ -105,13 +105,12 @@ define <64 x float> @vfwsub_v64f16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwsub.vv v8, v16, v24 +; CHECK-NEXT: vfwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v8, v16, v0 @@ -216,13 +215,12 @@ define <32 x double> @vfwsub_v32f32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwsub.vv v8, v16, v24 +; CHECK-NEXT: vfwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 3db44e87109bd4..9789afda9344ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -282,8 +282,8 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -325,8 +325,8 @@ define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1021,8 +1021,8 @@ declare <32 x i64> @llvm.vp.smax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1038,15 +1038,15 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmax.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmax_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1060,8 +1060,8 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmax.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index c97c2232715f56..36b0a4642b6169 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -281,8 +281,8 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -324,8 +324,8 @@ define <256 x i8> @vmaxu_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1020,8 +1020,8 @@ declare <32 x i64> @llvm.vp.umax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1037,15 +1037,15 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmaxu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1059,8 +1059,8 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index eaa19110a2a28c..adb0a30f34d35a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -282,8 +282,8 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -325,8 +325,8 @@ define <256 x i8> @vmin_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1021,8 +1021,8 @@ declare <32 x i64> @llvm.vp.smin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1038,15 +1038,15 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmin.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmin_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1060,8 +1060,8 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmin.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 48175e5b905ba5..671ce82d4ae795 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -281,8 +281,8 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -324,8 +324,8 @@ define <256 x i8> @vminu_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1020,8 +1020,8 @@ declare <32 x i64> @llvm.vp.umin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1037,15 +1037,15 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vminu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vminu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1059,8 +1059,8 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vminu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index a13f1eed8efb1d..028fb9a626f02d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -297,10 +297,10 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 @@ -1882,10 +1882,10 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: vmv8r.v v8, v24 @@ -1904,9 +1904,9 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: ret @@ -1933,10 +1933,10 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -1961,9 +1961,9 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -1991,10 +1991,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2020,9 +2020,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2051,10 +2051,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2077,10 +2077,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2109,10 +2109,10 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2137,9 +2137,9 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2167,10 +2167,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2196,9 +2196,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2227,10 +2227,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2253,10 +2253,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2270,8 +2270,8 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB93_2 @@ -2284,10 +2284,10 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2312,9 +2312,9 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2327,8 +2327,8 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB94_2 @@ -2341,10 +2341,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2370,9 +2370,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2386,8 +2386,8 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB95_2 @@ -2400,10 +2400,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2429,9 +2429,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2457,9 +2457,9 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a3, a1, a2 ; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 @@ -2467,8 +2467,8 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB96_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; @@ -2488,9 +2488,9 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 9ef89352e65e55..f204d812c14f68 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -377,9 +377,9 @@ define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -405,9 +405,9 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: sltu a3, a3, a4 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a4 -; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: addi a3, a2, -32 @@ -419,17 +419,17 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a1), v0.t ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 466448a7a05a21..9f0561b394b819 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1193,17 +1193,17 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb @@ -1229,9 +1229,9 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index cd9a38d5167d5a..0c180cd148b813 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1685,10 +1685,10 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: ret @@ -1718,12 +1718,12 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV64-NEXT: addi a0, a2, -16 ; RV64-NEXT: sltu a1, a2, a0 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -1753,10 +1753,10 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1766,51 +1766,44 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 16 +; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vmv4r.v v0, v24 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v0 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsext.vf2 v0, v24 +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsext.vf2 v24, v0 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB80_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB80_2: +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vslidedown.vi v0, v7, 2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1838,10 +1831,10 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1878,21 +1871,21 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB81_2: +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 @@ -1925,10 +1918,10 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1965,21 +1958,21 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB82_2: +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index c0aa735614b21a..f396790f4f1783 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -295,9 +295,9 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 291629de6dcfab..df2c83028e5dff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -386,8 +386,8 @@ define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -425,8 +425,8 @@ define <256 x i8> @vsadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -442,8 +442,8 @@ define <256 x i8> @vsadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1361,8 +1361,8 @@ declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsadd_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1378,15 +1378,15 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsadd_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1399,8 +1399,8 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1462,8 +1462,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1473,8 +1473,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1491,8 +1491,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1502,8 +1502,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index d38ee1148e894e..f50dadf0199105 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -382,8 +382,8 @@ define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -421,8 +421,8 @@ define <256 x i8> @vsaddu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -438,8 +438,8 @@ define <256 x i8> @vsaddu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1357,8 +1357,8 @@ declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsaddu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1374,15 +1374,15 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsaddu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1395,8 +1395,8 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1458,8 +1458,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1469,8 +1469,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1487,8 +1487,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1498,8 +1498,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll index 12d96fbfb88d63..4f533f2055bf34 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll @@ -24,17 +24,17 @@ define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: addi a4, a3, 128 +; CHECK-NEXT: addi a5, a3, 384 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: addi a2, a3, 128 -; CHECK-NEXT: addi a4, a3, 384 -; CHECK-NEXT: vle8.v v8, (a4) -; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: vle8.v v8, (a5) +; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: mul a2, a2, a5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, a1, 128 ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -48,10 +48,10 @@ define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: vle8.v v8, (a2) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle8.v v24, (a2) +; CHECK-NEXT: vle8.v v24, (a4) ; CHECK-NEXT: vle8.v v0, (a3) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index d05f580ea7d222..0a2ed3eb1ffbf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -175,19 +175,18 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: addi a0, a3, -128 ; CHECK-NEXT: sltu a4, a3, a0 -; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: vle8.v v0, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a0, a4, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0 ; CHECK-NEXT: bltu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -196,6 +195,7 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb @@ -221,39 +221,39 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle8.v v24, (a1) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 @@ -437,12 +437,12 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -456,15 +456,41 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v @@ -594,12 +620,12 @@ define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> % ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 7dcd4c41998279..ed2ed2a2ebfaa0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -5,8 +5,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -35,8 +35,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vv_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -73,8 +73,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -104,8 +104,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vx_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -144,8 +144,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vi_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -175,8 +175,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vi_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -214,8 +214,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -244,8 +244,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vv_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -282,8 +282,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -313,8 +313,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vx_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -353,8 +353,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vfpzero_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -384,8 +384,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vfpzero_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 2caa2ff41a7d93..b82ca70477ba36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -399,8 +399,8 @@ define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -440,8 +440,8 @@ define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -458,8 +458,8 @@ define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1401,8 +1401,8 @@ declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vssub_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1418,15 +1418,15 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vssub_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1440,8 +1440,8 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1504,8 +1504,8 @@ define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1516,8 +1516,8 @@ define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1534,8 +1534,8 @@ define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1546,8 +1546,8 @@ define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 6313f31bc1a615..6d8ed563f02bd2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -394,8 +394,8 @@ define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -435,8 +435,8 @@ define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -453,8 +453,8 @@ define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1396,8 +1396,8 @@ declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vssubu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1413,15 +1413,15 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vssubu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1435,8 +1435,8 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1499,8 +1499,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1511,8 +1511,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1529,8 +1529,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1541,8 +1541,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll index a4a5917fd4f9e6..d6ca6c5a4b83de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwadd_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwadd.vv v8, v16, v24 +; CHECK-NEXT: vwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwadd_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwadd.vv v8, v16, v24 +; CHECK-NEXT: vwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwadd_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwadd.vv v8, v16, v24 +; CHECK-NEXT: vwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v8, v16, v0 @@ -768,8 +765,8 @@ define <4 x i32> @vwadd_vx_v4i32_i32(ptr %x, ptr %y) { define <2 x i64> @vwadd_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lb a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -796,8 +793,8 @@ define <2 x i64> @vwadd_vx_v2i64_i8(ptr %x, ptr %y) nounwind { define <2 x i64> @vwadd_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lh a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -824,8 +821,8 @@ define <2 x i64> @vwadd_vx_v2i64_i16(ptr %x, ptr %y) nounwind { define <2 x i64> @vwadd_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -853,9 +850,9 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index bc0bf5dd76ad45..61378a424ecba8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwaddu_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwaddu.vv v8, v16, v24 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwaddu_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwaddu.vv v8, v16, v24 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwaddu_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwaddu.vv v8, v16, v24 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v8, v16, v0 @@ -769,8 +766,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -801,8 +798,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i16: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -833,8 +830,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -865,9 +862,9 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll index 2abd34f01c14c0..93927e10e607e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -289,13 +289,12 @@ define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmul.vv v8, v16, v24 +; CHECK-NEXT: vwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v8, v16, v0 @@ -337,13 +336,12 @@ define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmul.vv v8, v16, v24 +; CHECK-NEXT: vwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v8, v16, v0 @@ -384,13 +382,12 @@ define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmul.vv v8, v16, v24 +; CHECK-NEXT: vwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v8, v16, v0 @@ -883,9 +880,9 @@ define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll index 921037db2ea99e..ee114350a43239 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -281,13 +281,12 @@ define <128 x i16> @vwmulsu_v128i16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulsu.vv v8, v24, v16 +; CHECK-NEXT: vwmulsu.vv v24, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v8, v0, v16 @@ -329,13 +328,12 @@ define <64 x i32> @vwmulsu_v64i32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulsu.vv v8, v24, v16 +; CHECK-NEXT: vwmulsu.vv v24, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v8, v0, v16 @@ -376,13 +374,12 @@ define <32 x i64> @vwmulsu_v32i64(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulsu.vv v8, v24, v16 +; CHECK-NEXT: vwmulsu.vv v24, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v8, v0, v16 @@ -793,8 +790,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -827,8 +824,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -861,8 +858,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll index b97c9654ad3cb4..17a76ae5e7f75e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -265,13 +265,12 @@ define <128 x i16> @vwmulu_v128i16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulu.vv v8, v16, v24 +; CHECK-NEXT: vwmulu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v8, v16, v0 @@ -313,13 +312,12 @@ define <64 x i32> @vwmulu_v64i32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulu.vv v8, v16, v24 +; CHECK-NEXT: vwmulu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v8, v16, v0 @@ -360,13 +358,12 @@ define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulu.vv v8, v16, v24 +; CHECK-NEXT: vwmulu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll index 154093d759d6dd..a2675d59ade938 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwsub_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsub.vv v8, v16, v24 +; CHECK-NEXT: vwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwsub_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsub.vv v8, v16, v24 +; CHECK-NEXT: vwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwsub_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsub.vv v8, v16, v24 +; CHECK-NEXT: vwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v8, v16, v0 @@ -715,8 +712,8 @@ define <8 x i16> @vwsub_vx_v8i16_i16(ptr %x, ptr %y) { define <4 x i32> @vwsub_vx_v4i32_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v4i32_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: lb a1, 0(a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vwsub.vv v8, v10, v9 @@ -769,8 +766,8 @@ define <4 x i32> @vwsub_vx_v4i32_i32(ptr %x, ptr %y) { define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lb a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -779,8 +776,8 @@ define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsub_vx_v2i64_i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lb a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsub.vv v8, v10, v9 @@ -798,8 +795,8 @@ define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lh a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -808,8 +805,8 @@ define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsub_vx_v2i64_i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lh a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsub.vv v8, v10, v9 @@ -827,8 +824,8 @@ define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { define <2 x i64> @vwsub_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -856,9 +853,9 @@ define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index a084b5383b4030..1a9e3aac003416 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwsubu_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsubu.vv v8, v16, v24 +; CHECK-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwsubu_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsubu.vv v8, v16, v24 +; CHECK-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwsubu_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsubu.vv v8, v16, v24 +; CHECK-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v8, v16, v0 @@ -715,8 +712,8 @@ define <8 x i16> @vwsubu_vx_v8i16_i16(ptr %x, ptr %y) { define <4 x i32> @vwsubu_vx_v4i32_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v4i32_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vwsubu.vv v8, v10, v9 @@ -770,8 +767,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -783,8 +780,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsubu.vv v8, v10, v9 @@ -803,8 +800,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i16: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -816,8 +813,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lhu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsubu.vv v8, v10, v9 @@ -836,8 +833,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -868,9 +865,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll index f4d679cd57cac9..df90dae379c06c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -151,8 +151,8 @@ declare <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB12_2 @@ -167,8 +167,8 @@ define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index d464b491bbbe2f..26a3e053bf7aad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -135,16 +135,16 @@ declare @llvm.vp.floor.nxv8f16(, @vp_floor_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -179,16 +179,16 @@ declare @llvm.vp.floor.nxv16f16(, @vp_floor_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -223,16 +223,16 @@ declare @llvm.vp.floor.nxv32f16(, @vp_floor_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -359,8 +359,8 @@ define @vp_floor_nxv4f32( %va, @vp_floor_nxv8f32( %va, @vp_floor_nxv16f32( %va, @llvm.vp.floor.nxv2f64(, @vp_floor_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -569,16 +569,16 @@ declare @llvm.vp.floor.nxv4f64(, @vp_floor_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -613,16 +613,16 @@ declare @llvm.vp.floor.nxv7f64(, @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -657,16 +657,16 @@ declare @llvm.vp.floor.nxv8f64(, @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -705,66 +705,56 @@ define @vp_floor_nxv16f64( %va, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_vv_nxv4f16( %va, @vfmax_vv_nxv8f16( %va, @vfmax_vv_nxv16f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: li a4, 24 @@ -647,11 +631,12 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 @@ -683,13 +668,13 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 @@ -1012,77 +997,99 @@ define @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: sub a0, a2, a1 -; CHECK-NEXT: sltu a3, a2, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 @@ -1185,12 +1206,12 @@ define @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 48baa12aa2e59b..e9425939249878 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -214,10 +214,7 @@ define @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_vv_nxv4f16( %va, @vfmin_vv_nxv8f16( %va, @vfmin_vv_nxv16f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: li a4, 24 @@ -647,11 +631,12 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 @@ -683,13 +668,13 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 @@ -1012,77 +997,99 @@ define @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: sub a0, a2, a1 -; CHECK-NEXT: sltu a3, a2, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 @@ -1185,12 +1206,12 @@ define @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index f90237b8d7e95d..f88a9b3081a1a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -19,9 +19,9 @@ define @nearbyint_nxv1f16( %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -42,9 +42,9 @@ define @nearbyint_nxv2f16( %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -65,9 +65,9 @@ define @nearbyint_nxv4f16( %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -88,9 +88,9 @@ define @nearbyint_nxv8f16( %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -111,9 +111,9 @@ define @nearbyint_nxv16f16( %v) strictf ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -134,9 +134,9 @@ define @nearbyint_nxv32f16( %v) strictf ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv32f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -157,9 +157,9 @@ define @nearbyint_nxv1f32( %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -180,9 +180,9 @@ define @nearbyint_nxv2f32( %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -203,9 +203,9 @@ define @nearbyint_nxv4f32( %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -226,9 +226,9 @@ define @nearbyint_nxv8f32( %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -249,9 +249,9 @@ define @nearbyint_nxv16f32( %v) stric ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -272,9 +272,9 @@ define @nearbyint_nxv1f64( %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -295,9 +295,9 @@ define @nearbyint_nxv2f64( %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -318,9 +318,9 @@ define @nearbyint_nxv4f64( %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -341,9 +341,9 @@ define @nearbyint_nxv8f64( %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll index 9aa356b9b65e0b..9e14852305caa1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -15,9 +15,9 @@ define @nearbyint_nxv1f16( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv1f16( %x) ret %a @@ -35,9 +35,9 @@ define @nearbyint_nxv2f16( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2f16( %x) ret %a @@ -55,9 +55,9 @@ define @nearbyint_nxv4f16( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f16( %x) ret %a @@ -75,9 +75,9 @@ define @nearbyint_nxv8f16( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f16( %x) ret %a @@ -95,9 +95,9 @@ define @nearbyint_nxv16f16( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv16f16( %x) ret %a @@ -115,9 +115,9 @@ define @nearbyint_nxv32f16( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv32f16( %x) ret %a @@ -135,9 +135,9 @@ define @nearbyint_nxv1f32( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv1f32( %x) ret %a @@ -155,9 +155,9 @@ define @nearbyint_nxv2f32( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2f32( %x) ret %a @@ -175,9 +175,9 @@ define @nearbyint_nxv4f32( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f32( %x) ret %a @@ -195,9 +195,9 @@ define @nearbyint_nxv8f32( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f32( %x) ret %a @@ -215,9 +215,9 @@ define @nearbyint_nxv16f32( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv16f32( %x) ret %a @@ -235,9 +235,9 @@ define @nearbyint_nxv1f64( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv1f64( %x) ret %a @@ -255,9 +255,9 @@ define @nearbyint_nxv2f64( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2f64( %x) ret %a @@ -275,9 +275,9 @@ define @nearbyint_nxv4f64( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f64( %x) ret %a @@ -295,9 +295,9 @@ define @nearbyint_nxv8f64( %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index bb28ff5c6dc4fd..aa845bd8bb0b7b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -448,8 +448,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -467,8 +467,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -609,8 +609,8 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -628,8 +628,8 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -780,8 +780,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -799,8 +799,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1397,8 +1397,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1422,8 +1422,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1440,11 +1440,11 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1454,8 +1454,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1473,8 +1473,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1485,18 +1485,18 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 @@ -1710,8 +1710,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1735,8 +1735,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1753,11 +1753,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1767,8 +1767,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1786,8 +1786,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1798,18 +1798,18 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 @@ -2045,8 +2045,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2070,8 +2070,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2088,11 +2088,11 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2102,8 +2102,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -2121,8 +2121,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -2133,18 +2133,18 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 @@ -2279,9 +2279,9 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 @@ -2412,9 +2412,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: snez a1, a1 @@ -2524,9 +2524,9 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, s1 @@ -2686,9 +2686,9 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 @@ -2819,9 +2819,9 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: snez a1, a1 @@ -2931,9 +2931,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, s1 @@ -3819,8 +3819,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -3838,8 +3838,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -3978,8 +3978,8 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -3997,8 +3997,8 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4148,8 +4148,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4167,8 +4167,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4753,8 +4753,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -4778,8 +4778,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -4796,11 +4796,11 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -4810,8 +4810,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4829,8 +4829,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4841,18 +4841,18 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 @@ -5064,8 +5064,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5089,8 +5089,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5107,11 +5107,11 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5121,8 +5121,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5140,8 +5140,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5152,18 +5152,18 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 @@ -5398,8 +5398,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5423,8 +5423,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5441,11 +5441,11 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5455,8 +5455,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5474,8 +5474,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5486,18 +5486,18 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 @@ -5633,9 +5633,9 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 @@ -5766,9 +5766,9 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti @@ -5867,9 +5867,9 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, a1 @@ -6019,9 +6019,9 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 @@ -6152,9 +6152,9 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti @@ -6253,9 +6253,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll index 8f36aad8172744..c45af61ced94fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -163,12 +163,11 @@ define @test_signed_v4f64_v4i16( %f) { ; CHECK-NEXT: vfmin.vf v12, v12, fa4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) ret %x @@ -186,12 +185,11 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-NEXT: vfmin.vf v16, v16, fa4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) ret %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index f9f085dcc16143..c44bf087fce852 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -960,10 +960,10 @@ define @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @llrint_nxv16i64_nxv16f32( %x, < ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll index 9fa8807ed4add9..c9f91bf9def2ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll @@ -132,8 +132,8 @@ define @lrint_nxv16f32( %x, @intrinsic_viota_mask_m_nxv1i8_nxv1i1( ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: viota.m v8, v9, v0.t ; CHECK-NEXT: ret entry: @@ -1313,8 +1313,8 @@ define @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmsbf.m v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret @@ -1444,8 +1444,8 @@ define @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1( %ptrs0, %ptr ; RV32-LABEL: mgather_nxv16i64: ; RV32: # %bb.0: ; RV32-NEXT: vl8re64.v v24, (a0) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a2, a0, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: vslidedown.vx v7, v0, a2 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, a1, a0 @@ -1222,18 +1222,18 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vmv8r.v v16, v8 -; RV64-NEXT: vl8re64.v v8, (a1) -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t +; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vl8re64.v v8, (a1) ; RV64-NEXT: srli a1, a0, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vslidedown.vx v7, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t @@ -2131,8 +2131,8 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 0e09f59b6a20fe..9bfa0f31dc3a61 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1691,15 +1691,15 @@ declare @llvm.vector.insert.nxv8p0.nxv16p0( %val0, %val1, %ptrs0, %ptrs1, %m) { ; RV32-LABEL: mscatter_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vl4re32.v v24, (a0) ; RV32-NEXT: vl4re32.v v28, (a1) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t +; RV32-NEXT: vl4re32.v v4, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a0 +; RV32-NEXT: vslidedown.vx v24, v0, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v4, v0.t +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret ; @@ -1708,25 +1708,36 @@ define void @mscatter_nxv16f64( %val0, %val0, %val0, %idxs @@ -1785,13 +1796,13 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64( %val0, %val0, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 8bc2334282653f..a3ea462b6a7376 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -23,9 +23,9 @@ define @vp_nearbyint_nxv1f16( %va, @vp_nearbyint_nxv1f16( %va, @llvm.vp.nearbyint.nxv1f16( %va, %m, i32 %evl) ret %v @@ -63,9 +63,9 @@ define @vp_nearbyint_nxv1f16_unmasked( %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16_unmasked: @@ -80,11 +80,11 @@ define @vp_nearbyint_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v @@ -105,9 +105,9 @@ define @vp_nearbyint_nxv2f16( %va, @vp_nearbyint_nxv2f16( %va, @llvm.vp.nearbyint.nxv2f16( %va, %m, i32 %evl) ret %v @@ -145,9 +145,9 @@ define @vp_nearbyint_nxv2f16_unmasked( %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16_unmasked: @@ -162,11 +162,11 @@ define @vp_nearbyint_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v @@ -187,9 +187,9 @@ define @vp_nearbyint_nxv4f16( %va, @vp_nearbyint_nxv4f16( %va, @llvm.vp.nearbyint.nxv4f16( %va, %m, i32 %evl) ret %v @@ -229,9 +229,9 @@ define @vp_nearbyint_nxv4f16_unmasked( %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16_unmasked: @@ -246,11 +246,11 @@ define @vp_nearbyint_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v @@ -261,21 +261,21 @@ declare @llvm.vp.nearbyint.nxv8f16(, @vp_nearbyint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: frflags a0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16: @@ -290,15 +290,15 @@ define @vp_nearbyint_nxv8f16( %va, @llvm.vp.nearbyint.nxv8f16( %va, %m, i32 %evl) ret %v @@ -315,9 +315,9 @@ define @vp_nearbyint_nxv8f16_unmasked( %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16_unmasked: @@ -332,11 +332,11 @@ define @vp_nearbyint_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v @@ -347,21 +347,21 @@ declare @llvm.vp.nearbyint.nxv16f16(, < define @vp_nearbyint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: frflags a0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16: @@ -376,15 +376,15 @@ define @vp_nearbyint_nxv16f16( %va, @llvm.vp.nearbyint.nxv16f16( %va, %m, i32 %evl) ret %v @@ -401,9 +401,9 @@ define @vp_nearbyint_nxv16f16_unmasked( ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16_unmasked: @@ -418,11 +418,11 @@ define @vp_nearbyint_nxv16f16_unmasked( ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v @@ -433,21 +433,21 @@ declare @llvm.vp.nearbyint.nxv32f16(, < define @vp_nearbyint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: frflags a0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16: @@ -468,20 +468,20 @@ define @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16_unmasked( ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked: @@ -555,20 +555,20 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a2 @@ -589,11 +589,11 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -618,9 +618,9 @@ define @vp_nearbyint_nxv1f32( %va, @llvm.vp.nearbyint.nxv1f32( %va, %m, i32 %evl) ret %v @@ -637,9 +637,9 @@ define @vp_nearbyint_nxv1f32_unmasked( ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v @@ -660,9 +660,9 @@ define @vp_nearbyint_nxv2f32( %va, @llvm.vp.nearbyint.nxv2f32( %va, %m, i32 %evl) ret %v @@ -679,9 +679,9 @@ define @vp_nearbyint_nxv2f32_unmasked( ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v @@ -700,13 +700,13 @@ define @vp_nearbyint_nxv4f32( %va, @llvm.vp.nearbyint.nxv4f32( %va, %m, i32 %evl) ret %v @@ -723,9 +723,9 @@ define @vp_nearbyint_nxv4f32_unmasked( ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v @@ -744,13 +744,13 @@ define @vp_nearbyint_nxv8f32( %va, @llvm.vp.nearbyint.nxv8f32( %va, %m, i32 %evl) ret %v @@ -767,9 +767,9 @@ define @vp_nearbyint_nxv8f32_unmasked( ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v @@ -788,13 +788,13 @@ define @vp_nearbyint_nxv16f32( %va, < ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f32( %va, %m, i32 %evl) ret %v @@ -811,9 +811,9 @@ define @vp_nearbyint_nxv16f32_unmasked( @llvm.vp.nearbyint.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v @@ -834,9 +834,9 @@ define @vp_nearbyint_nxv1f64( %va, @llvm.vp.nearbyint.nxv1f64( %va, %m, i32 %evl) ret %v @@ -853,9 +853,9 @@ define @vp_nearbyint_nxv1f64_unmasked( @llvm.vp.nearbyint.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v @@ -866,21 +866,21 @@ declare @llvm.vp.nearbyint.nxv2f64(, define @vp_nearbyint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f64( %va, %m, i32 %evl) ret %v @@ -897,9 +897,9 @@ define @vp_nearbyint_nxv2f64_unmasked( @llvm.vp.nearbyint.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v @@ -910,21 +910,21 @@ declare @llvm.vp.nearbyint.nxv4f64(, define @vp_nearbyint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f64( %va, %m, i32 %evl) ret %v @@ -941,9 +941,9 @@ define @vp_nearbyint_nxv4f64_unmasked( @llvm.vp.nearbyint.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v @@ -954,21 +954,21 @@ declare @llvm.vp.nearbyint.nxv7f64(, define @vp_nearbyint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv7f64( %va, %m, i32 %evl) ret %v @@ -985,9 +985,9 @@ define @vp_nearbyint_nxv7f64_unmasked( @llvm.vp.nearbyint.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v @@ -998,21 +998,21 @@ declare @llvm.vp.nearbyint.nxv8f64(, define @vp_nearbyint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f64( %va, %m, i32 %evl) ret %v @@ -1029,9 +1029,9 @@ define @vp_nearbyint_nxv8f64_unmasked( @llvm.vp.nearbyint.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v @@ -1043,82 +1043,47 @@ declare @llvm.vp.nearbyint.nxv16f64( @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vfabs.v v16, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) ret %v @@ -1153,9 +1118,9 @@ define @vp_nearbyint_nxv16f64_unmasked( @llvm.vp.nearbyint.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll index d13d67fd0a8824..8bb62eaa8e9e94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -27,20 +27,18 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: fsw fa0, 0(sp) ; CHECK-NEXT: addi a0, sp, 4 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vi v10, v9, 1 +; CHECK-NEXT: vslideup.vi v8, v11, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index f934127f978dc9..88bd92c6ec1612 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -185,8 +185,8 @@ define @vp_rint_nxv4f16( %va, @llvm.vp.rint.nxv8f16(, @vp_rint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -263,8 +263,8 @@ define @vp_rint_nxv8f16( %va, @llvm.vp.rint.nxv16f16(, @vp_rint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -341,8 +341,8 @@ define @vp_rint_nxv16f16( %va, @llvm.vp.rint.nxv32f16(, @vp_rint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -426,46 +426,50 @@ define @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -640,8 +644,8 @@ define @vp_rint_nxv4f32( %va, @vp_rint_nxv8f32( %va, @vp_rint_nxv16f32( %va, @llvm.vp.rint.nxv2f64(, @vp_rint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -831,15 +835,15 @@ declare @llvm.vp.rint.nxv4f64(, @vp_rint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -871,15 +875,15 @@ declare @llvm.vp.rint.nxv7f64(, @vp_rint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -911,15 +915,15 @@ declare @llvm.vp.rint.nxv8f64(, @vp_rint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -955,62 +959,51 @@ define @vp_rint_nxv16f64( %va, @vp_round_nxv4f16( %va, @llvm.vp.round.nxv8f16(, @vp_round_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define @vp_round_nxv8f16( %va, @llvm.vp.round.nxv16f16(, @vp_round_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -376,8 +376,8 @@ define @vp_round_nxv16f16( %va, @llvm.vp.round.nxv32f16(, @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -458,7 +458,6 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -701,8 +708,8 @@ define @vp_round_nxv4f32( %va, @vp_round_nxv8f32( %va, @vp_round_nxv16f32( %va, @llvm.vp.round.nxv2f64(, @vp_round_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -911,16 +918,16 @@ declare @llvm.vp.round.nxv4f64(, @vp_round_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -955,16 +962,16 @@ declare @llvm.vp.round.nxv7f64(, @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -999,16 +1006,16 @@ declare @llvm.vp.round.nxv8f64(, @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1047,66 +1054,56 @@ define @vp_round_nxv16f64( %va, @vp_roundeven_nxv4f16( %va, @llvm.vp.roundeven.nxv8f16(, @vp_roundeven_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define @vp_roundeven_nxv8f16( %va, @llvm.vp.roundeven.nxv16f16(, < define @vp_roundeven_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -376,8 +376,8 @@ define @vp_roundeven_nxv16f16( %va, @llvm.vp.roundeven.nxv32f16(, < define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -458,7 +458,6 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -701,8 +708,8 @@ define @vp_roundeven_nxv4f32( %va, @vp_roundeven_nxv8f32( %va, @vp_roundeven_nxv16f32( %va, < ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -867,16 +874,16 @@ declare @llvm.vp.roundeven.nxv2f64(, define @vp_roundeven_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -911,16 +918,16 @@ declare @llvm.vp.roundeven.nxv4f64(, define @vp_roundeven_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -955,16 +962,16 @@ declare @llvm.vp.roundeven.nxv7f64(, define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -999,16 +1006,16 @@ declare @llvm.vp.roundeven.nxv8f64(, define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1047,66 +1054,56 @@ define @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 71a53c525551c5..1227e73a024325 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -204,8 +204,8 @@ define @vp_roundtozero_nxv4f16( %va, @llvm.vp.roundtozero.nxv8f16(, @vp_roundtozero_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define @vp_roundtozero_nxv8f16( %va, @llvm.vp.roundtozero.nxv16f16(, define @vp_roundtozero_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -376,8 +376,8 @@ define @vp_roundtozero_nxv16f16( %va, < ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -433,16 +433,16 @@ declare @llvm.vp.roundtozero.nxv32f16(, define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -458,7 +458,6 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v16, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -466,52 +465,60 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vmv1r.v v16, v0 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vmv1r.v v8, v16 ; ZVFHMIN-NEXT: vmv1r.v v0, v16 -; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v16 -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -556,20 +563,20 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv4f32( %va, @vp_roundtozero_nxv8f32( %va, @vp_roundtozero_nxv16f32( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -867,16 +874,16 @@ declare @llvm.vp.roundtozero.nxv2f64( define @vp_roundtozero_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -911,16 +918,16 @@ declare @llvm.vp.roundtozero.nxv4f64( define @vp_roundtozero_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -955,16 +962,16 @@ declare @llvm.vp.roundtozero.nxv7f64( define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -999,16 +1006,16 @@ declare @llvm.vp.roundtozero.nxv8f64( define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1047,66 +1054,56 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index e73415ac0085eb..8210ea22a6ee91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -67,13 +67,13 @@ define @foo( %a, @foo( %a, @foo( %a, @vfredusum( %passthru, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vx_nxv128i8( %va, i8 %b, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB97_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -1186,8 +1186,8 @@ define @icmp_eq_vx_swap_nxv128i8( %va, i8 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB98_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -2248,28 +2248,27 @@ define @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vx_nxv32i32( %va, i32 %b, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB190_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -2344,8 +2344,8 @@ define @icmp_eq_vx_swap_nxv32i32( %va, i32 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB191_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index ab7da9e0faf2b9..6e327457bebffc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -49,8 +49,8 @@ define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -78,11 +78,11 @@ define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -174,8 +174,8 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -234,10 +234,10 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v20, (a0) -; CHECK-NEXT: vmv2r.v v16, v10 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv2r.v v20, v10 ; CHECK-NEXT: vmv2r.v v12, v8 -; CHECK-NEXT: vrgather.vv v8, v12, v20 +; CHECK-NEXT: vrgather.vv v8, v12, v16 ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vi v12, v12, 15 ; CHECK-NEXT: lui a0, 16 @@ -245,7 +245,7 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v20, v12, v0.t ; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -329,18 +329,18 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: v8i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: vrsub.vi v18, v14, 15 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v18, v10, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v12, v14, 7 +; CHECK-NEXT: vrsub.vi v8, v10, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v16i32 = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> ret <16 x i32> %v16i32 @@ -492,8 +492,8 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -638,18 +638,18 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: v8f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: vrsub.vi v18, v14, 15 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v18, v10, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v12, v14, 7 +; CHECK-NEXT: vrsub.vi v8, v10, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v16f32 = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> ret <16 x float> %v16f32 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 8a297db7a3b8fa..d1c98f828e76df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -4873,8 +4873,8 @@ define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i ; CHECK-NEXT: .LBB102_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v9, (a0), v0.t @@ -4914,8 +4914,8 @@ define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zer ; CHECK-NEXT: .LBB103_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v9, (a0), v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index b7fe722958bfb8..9d0234d2ec2fbc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -33,13 +33,13 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -52,13 +52,13 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -70,14 +70,14 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -115,13 +115,13 @@ define @vec_nxv4i32( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v16, v14, v10 +; CHECK-NEXT: vmsne.vv v10, v8, v16 +; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv4i32( %x, %y) ret %tmp @@ -134,13 +134,13 @@ define @vec_nxv8i16( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v16, v14, v10 +; CHECK-NEXT: vmsne.vv v10, v8, v16 +; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv8i16( %x, %y) ret %tmp @@ -152,14 +152,14 @@ define @vec_nxv16i8( %x, ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v16, v14, v10 +; CHECK-NEXT: vmsne.vv v10, v8, v16 +; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv16i8( %x, %y) ret %tmp diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index eff8c26d4d0616..b3150ecea6c0ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -562,8 +562,8 @@ define @add_stepvector_nxv16i64() { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vadd.vv v8, v8, v8 @@ -597,8 +597,8 @@ define @mul_stepvector_nxv16i64() { ; RV32-NEXT: slli a1, a0, 1 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: li a0, 3 @@ -686,8 +686,8 @@ define @shl_stepvector_nxv16i64() { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vsll.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0e2105d5cba861..4d3bced0bcb50f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -588,15 +588,15 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, %mask, i32 %evl) @@ -697,10 +697,10 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 sig ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: mul a4, a4, a1 -; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: srli a3, a3, 3 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret @@ -567,36 +567,36 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-NEXT: sub a0, a5, a4 ; CHECK-NEXT: sltu t0, a5, a0 ; CHECK-NEXT: addi t0, t0, -1 -; CHECK-NEXT: and a0, t0, a0 -; CHECK-NEXT: mul a7, a7, a2 -; CHECK-NEXT: add a7, a1, a7 -; CHECK-NEXT: srli t0, a4, 3 +; CHECK-NEXT: and t0, t0, a0 +; CHECK-NEXT: mul a0, a7, a2 +; CHECK-NEXT: add a7, a1, a0 +; CHECK-NEXT: srli a0, a4, 3 ; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, t0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a0 ; CHECK-NEXT: sub a0, a3, a6 ; CHECK-NEXT: sltu a3, a3, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t ; CHECK-NEXT: bltu a0, a4, .LBB43_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB43_6: ; CHECK-NEXT: mul a3, a5, a2 -; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: srli a4, a4, 2 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v24, a4 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 4b5e737d22eb83..ede395f4df8e17 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -580,8 +580,8 @@ define @vadd_vi_nxv128i8( %va, @llvm.vp.add.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1359,8 +1359,8 @@ define @vadd_vi_nxv32i32( %va, @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) @@ -1415,8 +1415,8 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, @vadd_vi_nxv32i32_evl_nx16( %va, < ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %evl = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index 939a45e15c1037..6e34d59a2d9894 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -255,9 +255,9 @@ define @vfsgnj_vv_nxv32f16( %va, @vfsgnj_vv_nxv32f16( %va, @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll index 6f06d8e570de0b..e59a9174b03d94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll @@ -44,8 +44,8 @@ define iXLen @intrinsic_vcpop_mask_m_nxv1i1( %0, %0, %0, %0, %0, %0, %0, , <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t ; CHECK-NEXT: vmsne.vi v9, v9, 0 ; CHECK-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 8f4ff37fffb02a..f0f847c61f3b01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -110,23 +110,22 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re64.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v16, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v0, v16 +; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v16, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -134,34 +133,47 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v24, v8, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vrgather.vv v8, v0, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vadd.vi v8, v16, 1 -; CHECK-NEXT: vrgather.vv v16, v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v16, v0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v24, v0, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v28, v8 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v8 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v8 ; CHECK-NEXT: vmv8r.v v8, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 7797577362c938..bcb008857ad320 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -8,14 +8,15 @@ define {, } @vector_deinterleave_nxv16i1_nxv ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a0 +; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 ; CHECK-NEXT: vnsrl.wi v10, v12, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 @@ -90,25 +91,38 @@ declare {, } @llvm.vector.deinterleave2.nxv4 define {, } @vector_deinterleave_nxv64i1_nxv128i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v8 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vnsrl.wi v12, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v7, v24, 0 +; CHECK-NEXT: vmsne.vi v7, v8, 0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 8 -; CHECK-NEXT: vnsrl.wi v28, v8, 8 +; CHECK-NEXT: vnsrl.wi v0, v16, 8 +; CHECK-NEXT: vnsrl.wi v4, v24, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v9, v24, 0 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmsne.vi v9, v0, 0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv128i1( %vec) ret {, } %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index 7ade47e60bc696..5ebf63f0a4411e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -107,14 +107,14 @@ define void @vector_interleave_store_nxv16i64_nxv8i64( %a, %a, @vector_interleave_nxv4i64_nxv2i64( ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vand.vi v13, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vsrl.vi v16, v12, 1 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -137,9 +137,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; ZVBB-NEXT: vid.v v12 +; ZVBB-NEXT: vand.vi v13, v12, 1 +; ZVBB-NEXT: vmsne.vi v0, v13, 0 ; ZVBB-NEXT: vsrl.vi v16, v12, 1 -; ZVBB-NEXT: vand.vi v12, v12, 1 -; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t ; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16 @@ -269,13 +269,13 @@ define @vector_interleave_nxv32i32_nxv16i32( @llvm.vector.interleave2.nxv32i32( %a, %b) @@ -288,44 +288,32 @@ define @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv32f32_nxv16f32( @llvm.vector.interleave2.nxv32f32( %a, %b) @@ -612,44 +588,32 @@ define @vector_interleave_nxv16f64_nxv8f64( @vector_interleave_nxv16f64_nxv8f64( @splice_nxv32i1_offset_max( %a, @splice_nxv64i1_offset_max( %a, @vfabs_vv_nxv32f16( %va, @vfabs_vv_nxv16f64( %va, @llvm.vp.fabs.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index c69a7bc5cece47..ad7fb63fec2fcf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -579,9 +579,9 @@ define @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -675,12 +675,20 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfadd_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -692,7 +700,9 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -702,11 +712,15 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB24_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 3ad17e85570a20..81d844d1950ab2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -525,9 +525,9 @@ define @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -621,12 +621,20 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -638,7 +646,9 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -648,11 +658,15 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll index 3be3f835f3d11c..eafd605c6110eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll @@ -44,8 +44,8 @@ define iXLen @intrinsic_vfirst_mask_m_nxv1i1( %0, %0, %0, %0, %0, %0, %0, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8re64.v v16, (a3) ; CHECK-NEXT: sub a3, a4, a1 ; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24 ; CHECK-NEXT: bltu a4, a1, .LBB93_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB93_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -1259,6 +1258,7 @@ define @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll index d4ba0f8c907338..c15b875e8f0c4e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll @@ -239,15 +239,15 @@ define @vfmadd_vv_nxv16f16( %va, @vfmadd_vv_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmadd_vv_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll index 755c665376128a..e928df85b5bb56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -255,9 +255,9 @@ define @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll index 5114f0a8d1d65d..c835dc72268b32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll @@ -347,75 +347,64 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vf_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -621,12 +621,20 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfmul_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -638,7 +646,9 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -648,11 +658,15 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index 292f27794f378a..abda6750e5a8a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1125,22 +1125,20 @@ define @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8re64.v v16, (a3) ; CHECK-NEXT: sub a3, a4, a1 ; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24 ; CHECK-NEXT: bltu a4, a1, .LBB93_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB93_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -1259,6 +1258,7 @@ define @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index 1db5fa1720a276..69ea7ce33cf6b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -250,8 +250,8 @@ define @vfneg_vv_nxv32f16( %va, @vfneg_vv_nxv16f64( %va, @llvm.vp.fneg.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll index 785f60ad1d39c8..b54590cd9d8440 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll @@ -412,85 +412,85 @@ define @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfpext_nxv32f16_nxv32f32( %a, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll index 15c4bf255e6dce..f3544589407d8c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -394,7 +394,15 @@ declare @llvm.vp.fptosi.nxv32i16.nxv32f32( @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -404,16 +412,22 @@ define @vfptosi_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.rtz.x.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v @@ -440,8 +454,8 @@ define @vfptosi_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i32.nxv32f32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index a2591e7dc35f03..9fd2d8edb2203d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -394,7 +394,15 @@ declare @llvm.vp.fptoui.nxv32i16.nxv32f32( @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -404,16 +412,22 @@ define @vfptoui_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.rtz.xu.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v @@ -440,8 +454,8 @@ define @vfptoui_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i32.nxv32f32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index 0c3abe37af27a9..d2219cf9635967 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -113,16 +113,16 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb @@ -169,11 +169,11 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: sub a6, a5, a1 ; CHECK-NEXT: sltu a7, a5, a6 ; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a6, a7, a6 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma ; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: and a0, a7, a6 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: bltu a5, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: @@ -181,8 +181,8 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t ; CHECK-NEXT: bltu a2, a4, .LBB8_4 ; CHECK-NEXT: # %bb.3: @@ -192,22 +192,22 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB8_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index d6caad15e40a2c..bd229e0220a4b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -250,8 +250,8 @@ define @vfsqrt_vv_nxv32f16( %va, @vfsqrt_vv_nxv16f64( %va, @llvm.vp.sqrt.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 2eae18d7cc4937..c833f8048fe32a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -525,9 +525,9 @@ define @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -621,12 +621,20 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfsub_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -638,7 +646,9 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -648,11 +658,15 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll index 78f3792dbaf062..f9d992a40299c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll @@ -671,9 +671,9 @@ define @vfmacc_vv_nxv16f32( %a, @vfnmacc_vv_nxv16f32( %a, @vfnmacc_vf_nxv16f32_commute( % ; ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv4r.v v24, v8 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v8, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfnmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfnmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll index 2797ca2eb31630..0ad7be47bcc8e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll @@ -601,9 +601,9 @@ define @vfnmsac_vv_nxv16f32( %a, @vfnmsac_vf_nxv16f32_commute( % ; ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv4r.v v24, v8 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v8, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfnmsub.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfnmsub.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll index 5d0172430d15c8..77ef0a340270f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll @@ -449,12 +449,12 @@ define @vsitofp_nxv32i1_nxv32f16( %va) { ; ZVFHMIN-NEXT: vmv.v.i v12, 0 ; ZVFHMIN-NEXT: vmerge.vim v8, v12, -1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.x.v v16, v8 -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: vmerge.vim v12, v12, -1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.x.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 @@ -478,12 +478,12 @@ define @vuitofp_nxv32i1_nxv32f16( %va) { ; ZVFHMIN-NEXT: vmv.v.i v12, 0 ; ZVFHMIN-NEXT: vmerge.vim v8, v12, 1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.xu.v v16, v8 -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: vmerge.vim v12, v12, 1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.xu.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index a35fc874065a70..8a76467986620c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -423,8 +423,8 @@ define @vmax_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -986,8 +986,8 @@ define @vmax_vx_nxv32i32( %va, i32 %b, poison, i32 %b, i32 0 @@ -1046,8 +1046,8 @@ define @vmax_vx_nxv32i32_evl_nx8( %va, i3 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1084,8 +1084,8 @@ define @vmax_vx_nxv32i32_evl_nx16( %va, i ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vmax.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmax.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 1f620a44dbbc89..1c74887c1b20fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -425,8 +425,8 @@ define @vmaxu_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -988,8 +988,8 @@ define @vmaxu_vx_nxv32i32( %va, i32 %b, < ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB80_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1048,8 +1048,8 @@ define @vmaxu_vx_nxv32i32_evl_nx8( %va, i ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1086,8 +1086,8 @@ define @vmaxu_vx_nxv32i32_evl_nx16( %va, ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index e7184921d87a08..2e5b67c93fce1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfeq.mask.nxv1f16( define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16( define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16( define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16( define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfeq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16( define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfeq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32( define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32( define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32( define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfeq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32( define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfeq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64( define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64( define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfeq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64( define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfeq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfeq_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfeq_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfeq_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfeq_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfeq_mask_vf_nxv16f16_f16( @intrinsic_vmfeq_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfeq_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfeq_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfeq_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfeq_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfeq_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfeq_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index a6dad9eaa4f358..b5ca47707c8a82 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfge.mask.nxv1f16( define @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfge.mask.nxv2f16( define @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfge.mask.nxv4f16( define @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfge.mask.nxv8f16( define @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmfle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfge.mask.nxv16f16( define @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmfle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfge.mask.nxv1f32( define @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfge.mask.nxv2f32( define @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfge.mask.nxv4f32( define @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmfle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfge.mask.nxv8f32( define @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmfle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfge.mask.nxv1f64( define @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfge.mask.nxv2f64( define @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmfle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfge.mask.nxv4f64( define @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmfle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfge_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfge_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfge_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfge_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfge_mask_vf_nxv16f16_f16( @intrinsic_vmfge_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfge_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfge_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfge_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfge_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfge_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfge_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index f643a4036381c3..971249d38d1b26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16( define @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16( define @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16( define @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16( define @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmflt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16( define @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32( define @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32( define @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32( define @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmflt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32( define @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64( define @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64( define @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmflt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64( define @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfgt_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfgt_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfgt_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfgt_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfgt_mask_vf_nxv16f16_f16( @intrinsic_vmfgt_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfgt_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfgt_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfgt_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfgt_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfgt_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfgt_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index 6c52364c1fbd56..f19a181a365afc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfle.mask.nxv1f16( define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfle.mask.nxv2f16( define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfle.mask.nxv4f16( define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfle.mask.nxv8f16( define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfle.mask.nxv16f16( define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfle.mask.nxv1f32( define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfle.mask.nxv2f32( define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfle.mask.nxv4f32( define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfle.mask.nxv8f32( define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfle.mask.nxv1f64( define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfle.mask.nxv2f64( define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfle.mask.nxv4f64( define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfle_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfle_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfle_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfle_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfle_mask_vf_nxv16f16_f16( @intrinsic_vmfle_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfle_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfle_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfle_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfle_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfle_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfle_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 37a9c6b081a1df..0a046422193342 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmflt.mask.nxv1f16( define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmflt.mask.nxv2f16( define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmflt.mask.nxv4f16( define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmflt.mask.nxv8f16( define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmflt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmflt.mask.nxv16f16( define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmflt.mask.nxv1f32( define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmflt.mask.nxv2f32( define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmflt.mask.nxv4f32( define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmflt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmflt.mask.nxv8f32( define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmflt.mask.nxv1f64( define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmflt.mask.nxv2f64( define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmflt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmflt.mask.nxv4f64( define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmflt_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmflt_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmflt_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmflt_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmflt_mask_vf_nxv16f16_f16( @intrinsic_vmflt_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmflt_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmflt_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmflt_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmflt_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmflt_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmflt_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 5defce42091e55..520099247e0f3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfne.mask.nxv1f16( define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfne.mask.nxv2f16( define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfne.mask.nxv4f16( define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfne.mask.nxv8f16( define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfne.mask.nxv16f16( define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfne.mask.nxv1f32( define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfne.mask.nxv2f32( define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfne.mask.nxv4f32( define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfne.mask.nxv8f32( define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfne.mask.nxv1f64( define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfne.mask.nxv2f64( define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfne.mask.nxv4f64( define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfne_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfne_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfne_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfne_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfne_mask_vf_nxv16f16_f16( @intrinsic_vmfne_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfne_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfne_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfne_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfne_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfne_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfne_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 8fabf93356aebb..1c71242c3c7d79 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -423,8 +423,8 @@ define @vmin_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -986,8 +986,8 @@ define @vmin_vx_nxv32i32( %va, i32 %b, poison, i32 %b, i32 0 @@ -1046,8 +1046,8 @@ define @vmin_vx_nxv32i32_evl_nx8( %va, i3 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1084,8 +1084,8 @@ define @vmin_vx_nxv32i32_evl_nx16( %va, i ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vmin.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmin.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 8ec85e545a0f8e..6d89a9777cf917 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -425,8 +425,8 @@ define @vminu_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -988,8 +988,8 @@ define @vminu_vx_nxv32i32( %va, i32 %b, < ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB80_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1048,8 +1048,8 @@ define @vminu_vx_nxv32i32_evl_nx8( %va, i ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1086,8 +1086,8 @@ define @vminu_vx_nxv32i32_evl_nx16( %va, ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vminu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vminu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll index 2d6e958fcd0baf..14a1f084c3985b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll @@ -32,8 +32,8 @@ define @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -74,8 +74,8 @@ define @intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define @intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define @intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ define @intrinsic_vmsbf_mask_m_nxv16i1_nxv16i1( @intrinsic_vmsbf_mask_m_nxv32i1_nxv32i1( @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1( @llvm.riscv.vmseq.mask.nxv1i8( define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmseq.mask.nxv2i8( define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmseq.mask.nxv4i8( define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmseq.mask.nxv8i8( define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmseq.mask.nxv16i8( define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmseq.mask.nxv32i8( define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmseq.mask.nxv1i16( define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmseq.mask.nxv2i16( define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmseq.mask.nxv4i16( define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmseq.mask.nxv8i16( define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmseq.mask.nxv16i16( define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmseq.mask.nxv1i32( define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmseq.mask.nxv2i32( define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmseq.mask.nxv4i32( define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmseq.mask.nxv8i32( define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmseq.mask.nxv1i64( define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmseq.mask.nxv2i64( define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmseq.mask.nxv4i64( define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmseq_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmseq_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmseq_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmseq_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmseq_mask_vx_nxv16i16_i16( @intrinsic_vmseq_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmseq_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmseq.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmseq.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmseq.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmseq_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmseq_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmseq_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmseq_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmseq_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmseq_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmseq_mask_vi_nxv16i16_i16( @intrinsic_vmseq_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmseq_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmseq_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index c8f9b60a3f2da6..75fc407abbc2f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i8( define @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsge.mask.nxv2i8( define @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsge.mask.nxv4i8( define @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsge.mask.nxv8i8( define @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsge.mask.nxv16i8( define @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsge.mask.nxv32i8( define @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i16( define @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsge.mask.nxv2i16( define @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsge.mask.nxv4i16( define @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsge.mask.nxv8i16( define @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsge.mask.nxv16i16( define @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i32( define @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsge.mask.nxv2i32( define @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsge.mask.nxv4i32( define @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsge.mask.nxv8i32( define @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i64( define @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsge.mask.nxv2i64( define @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsge.mask.nxv4i64( define @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i64( @@ -954,8 +972,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1002,8 +1020,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1050,8 +1068,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1098,8 +1116,8 @@ define @intrinsic_vmsge_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1146,8 +1164,8 @@ define @intrinsic_vmsge_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1194,8 +1212,8 @@ define @intrinsic_vmsge_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v13, v12 ; CHECK-NEXT: ret @@ -1242,8 +1260,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1290,8 +1308,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1338,8 +1356,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1386,8 +1404,8 @@ define @intrinsic_vmsge_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1434,8 +1452,8 @@ define @intrinsic_vmsge_mask_vx_nxv16i16_i16( @intrinsic_vmsge_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1530,8 +1548,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1578,8 +1596,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1626,8 +1644,8 @@ define @intrinsic_vmsge_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v13, v12 ; CHECK-NEXT: ret @@ -1701,8 +1719,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmslt.vx v10, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v10, v9 ; RV64-NEXT: ret @@ -1776,8 +1794,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmslt.vx v11, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v11, v10 ; RV64-NEXT: ret @@ -1851,8 +1869,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmslt.vx v13, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v13, v12 ; RV64-NEXT: ret @@ -1886,8 +1904,8 @@ define @intrinsic_vmsge_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1921,8 +1939,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1956,8 +1974,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1991,8 +2009,8 @@ define @intrinsic_vmsge_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2026,8 +2044,8 @@ define @intrinsic_vmsge_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2061,8 +2079,8 @@ define @intrinsic_vmsge_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2096,8 +2114,8 @@ define @intrinsic_vmsge_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2131,8 +2149,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2166,8 +2184,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2201,8 +2219,8 @@ define @intrinsic_vmsge_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2236,8 +2254,8 @@ define @intrinsic_vmsge_mask_vi_nxv16i16_i16( @intrinsic_vmsge_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2306,8 +2324,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2341,8 +2359,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2376,8 +2394,8 @@ define @intrinsic_vmsge_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2411,8 +2429,8 @@ define @intrinsic_vmsge_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2446,8 +2464,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2481,8 +2499,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index b6c6d9e90f6109..5568c1e9b1cfb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8( define @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8( define @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8( define @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8( define @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8( define @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8( define @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16( define @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16( define @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16( define @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16( define @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16( define @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32( define @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32( define @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32( define @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32( define @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i64( define @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i64( define @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i64( define @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i64( @@ -954,8 +972,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1002,8 +1020,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1050,8 +1068,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1098,8 +1116,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1146,8 +1164,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv16i8_i8( @intrinsic_vmsgeu_mask_vx_nxv32i8_i8( @intrinsic_vmsgeu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1290,8 +1308,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1338,8 +1356,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1386,8 +1404,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1434,8 +1452,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv16i16_i16( @intrinsic_vmsgeu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1530,8 +1548,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1578,8 +1596,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1626,8 +1644,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v13, v12 ; CHECK-NEXT: ret @@ -1701,8 +1719,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsltu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v10, v9 ; RV64-NEXT: ret @@ -1776,8 +1794,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v11, v10 ; RV64-NEXT: ret @@ -1851,8 +1869,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v13, v12 ; RV64-NEXT: ret @@ -1886,8 +1904,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1921,8 +1939,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1956,8 +1974,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1991,8 +2009,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2026,8 +2044,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv16i8_i8( @intrinsic_vmsgeu_mask_vi_nxv32i8_i8( @intrinsic_vmsgeu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2178,8 +2196,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2213,8 +2231,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2248,8 +2266,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv16i16_i16( @intrinsic_vmsgeu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2318,8 +2336,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2353,8 +2371,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2388,8 +2406,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2423,8 +2441,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 14, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2458,8 +2476,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2493,8 +2511,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index dfd7096a65ebb9..f1fa6484d976b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8( define @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8( define @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8( define @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8( define @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8( define @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8( define @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16( define @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16( define @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16( define @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16( define @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16( define @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32( define @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32( define @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32( define @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32( define @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i64( define @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsgt.mask.nxv2i64( define @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsgt.mask.nxv4i64( define @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsgt_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsgt_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmsgt_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsgt_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsgt_mask_vx_nxv16i16_i16( @intrinsic_vmsgt_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsgt_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsgt.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsgt.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgt.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsgt_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsgt_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsgt_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmsgt_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmsgt_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsgt_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsgt_mask_vi_nxv16i16_i16( @intrinsic_vmsgt_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsgt_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsgt_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index 8826be03bbebb8..de7a0ad87be27c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8( define @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8( define @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8( define @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8( define @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8( define @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8( define @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16( define @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16( define @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16( define @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16( define @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16( define @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32( define @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32( define @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32( define @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32( define @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i64( define @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i64( define @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i64( define @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv16i8_i8( @intrinsic_vmsgtu_mask_vx_nxv32i8_i8( @intrinsic_vmsgtu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv16i16_i16( @intrinsic_vmsgtu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsgtu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgtu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv16i8_i8( @intrinsic_vmsgtu_mask_vi_nxv32i8_i8( @intrinsic_vmsgtu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv16i16_i16( @intrinsic_vmsgtu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll index 8ce9a3020b7a5c..05d402afc934cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll @@ -32,8 +32,8 @@ define @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -74,8 +74,8 @@ define @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ define @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1( @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1( @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1( @llvm.riscv.vmsle.mask.nxv1i8( define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsle.mask.nxv2i8( define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsle.mask.nxv4i8( define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsle.mask.nxv8i8( define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsle.mask.nxv16i8( define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsle.mask.nxv32i8( define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsle.mask.nxv1i16( define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsle.mask.nxv2i16( define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsle.mask.nxv4i16( define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsle.mask.nxv8i16( define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsle.mask.nxv16i16( define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsle.mask.nxv1i32( define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsle.mask.nxv2i32( define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsle.mask.nxv4i32( define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsle.mask.nxv8i32( define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsle.mask.nxv1i64( define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsle.mask.nxv2i64( define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsle.mask.nxv4i64( define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsle_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsle_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmsle_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsle_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsle_mask_vx_nxv16i16_i16( @intrinsic_vmsle_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsle_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsle.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsle.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsle.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsle_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsle_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsle_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmsle_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmsle_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsle_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsle_mask_vi_nxv16i16_i16( @intrinsic_vmsle_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsle_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsle_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index c58ac2d0718314..540577247484e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8( define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8( define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8( define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8( define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8( define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8( define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16( define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16( define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16( define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16( define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16( define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32( define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32( define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32( define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32( define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i64( define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsleu.mask.nxv2i64( define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsleu.mask.nxv4i64( define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsleu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsleu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsleu_mask_vx_nxv16i8_i8( @intrinsic_vmsleu_mask_vx_nxv32i8_i8( @intrinsic_vmsleu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsleu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsleu_mask_vx_nxv16i16_i16( @intrinsic_vmsleu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsleu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsleu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsleu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsleu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsleu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsleu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsleu_mask_vi_nxv16i8_i8( @intrinsic_vmsleu_mask_vi_nxv32i8_i8( @intrinsic_vmsleu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsleu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsleu_mask_vi_nxv16i16_i16( @intrinsic_vmsleu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsleu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsleu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 6c6e580b043d1a..554d25172d4fde 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i8( define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmslt.mask.nxv2i8( define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmslt.mask.nxv4i8( define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmslt.mask.nxv8i8( define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmslt.mask.nxv16i8( define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmslt.mask.nxv32i8( define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i16( define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmslt.mask.nxv2i16( define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmslt.mask.nxv4i16( define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmslt.mask.nxv8i16( define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmslt.mask.nxv16i16( define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i32( define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmslt.mask.nxv2i32( define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmslt.mask.nxv4i32( define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmslt.mask.nxv8i32( define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i64( define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmslt.mask.nxv2i64( define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmslt.mask.nxv4i64( define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmslt_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmslt_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmslt_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmslt_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmslt_mask_vx_nxv16i16_i16( @intrinsic_vmslt_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmslt_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmslt.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmslt.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmslt.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmslt_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmslt_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmslt_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, zero, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmslt_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmslt_mask_vi_nxv16i16_i16( @intrinsic_vmslt_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmslt_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmslt_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index 76f3e449ab58f5..7a8efa6c80fb6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8( define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8( define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8( define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8( define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8( define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8( define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16( define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16( define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16( define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16( define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16( define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32( define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32( define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32( define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32( define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i64( define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsltu.mask.nxv2i64( define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsltu.mask.nxv4i64( define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsltu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsltu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsltu_mask_vx_nxv16i8_i8( @intrinsic_vmsltu_mask_vx_nxv32i8_i8( @intrinsic_vmsltu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsltu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsltu_mask_vx_nxv16i16_i16( @intrinsic_vmsltu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsltu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsltu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( @intrinsic_vmsltu_mask_vi_nxv32i8_i8( @intrinsic_vmsltu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, zero, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( @intrinsic_vmsltu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsltu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 14, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 161c1bc4314fcb..bd6bd8a804bcc2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i8( define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsne.mask.nxv2i8( define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsne.mask.nxv4i8( define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsne.mask.nxv8i8( define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsne.mask.nxv16i8( define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsne.mask.nxv32i8( define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i16( define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsne.mask.nxv2i16( define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsne.mask.nxv4i16( define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsne.mask.nxv8i16( define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsne.mask.nxv16i16( define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i32( define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsne.mask.nxv2i32( define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsne.mask.nxv4i32( define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsne.mask.nxv8i32( define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i64( define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsne.mask.nxv2i64( define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsne.mask.nxv4i64( define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsne_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsne_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmsne_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsne_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsne_mask_vx_nxv16i16_i16( @intrinsic_vmsne_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsne_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsne.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsne.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsne.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsne_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsne_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsne_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmsne_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmsne_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsne_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsne_mask_vi_nxv16i16_i16( @intrinsic_vmsne_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsne_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsne_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll index f6f90eddcd8c5b..0c60681ea8de0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll @@ -32,8 +32,8 @@ define @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -74,8 +74,8 @@ define @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ define @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1( @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1( @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1( %src, %m, i32 %evl) { ; RV32-LABEL: bool_vec: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vfirst.m a1, v9, v0.t ; RV32-NEXT: bltz a1, .LBB0_2 ; RV32-NEXT: # %bb.1: @@ -20,8 +20,8 @@ define iXLen @bool_vec( %src, %m, i32 %evl) { ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vfirst.m a1, v9, v0.t ; RV64-NEXT: bltz a1, .LBB0_2 ; RV64-NEXT: # %bb.1: @@ -36,8 +36,8 @@ define iXLen @bool_vec_zero_poison( %src, %m, ; RV32-LABEL: bool_vec_zero_poison: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vfirst.m a0, v9, v0.t ; RV32-NEXT: ret ; @@ -46,8 +46,8 @@ define iXLen @bool_vec_zero_poison( %src, %m, ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vfirst.m a0, v9, v0.t ; RV64-NEXT: ret %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i1( %src, i1 1, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll index 8b1660283cb7d1..d0f2ce1ca80045 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -450,14 +450,14 @@ define @test_vp_reverse_nxv64i8_masked( %sr ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vrsub.vx v16, v16, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: vrgatherei16.vv v28, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t +; CHECK-NEXT: vslidedown.vx v8, v24, a1, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, %mask, i32 %evl) ret %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index a30ebf2d33b502..7f81b99eb0338d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -8,8 +8,8 @@ define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -45,8 +45,8 @@ define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -82,8 +82,8 @@ define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -119,8 +119,8 @@ define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index ceb6a164e20df6..acf7d16bda9826 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -7,8 +7,8 @@ define @test_vp_reverse_nxv1i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -44,8 +44,8 @@ define @test_vp_reverse_nxv2i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -81,8 +81,8 @@ define @test_vp_reverse_nxv4i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -118,8 +118,8 @@ define @test_vp_reverse_nxv8i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -155,8 +155,8 @@ define @test_vp_reverse_nxv16i1_masked( %sr ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v12, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t @@ -193,8 +193,8 @@ define @test_vp_reverse_nxv32i1_masked( %sr ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t @@ -242,8 +242,8 @@ define @test_vp_reverse_nxv64i1_masked( %sr ; CHECK-NEXT: vrgatherei16.vv v16, v28, v0 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v16, a1, v0.t ; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 @@ -263,14 +263,14 @@ define @test_vp_reverse_nxv64i1( %src, i32 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vrsub.vx v16, v16, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: vrgatherei16.vv v28, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a1 +; CHECK-NEXT: vslidedown.vx v8, v24, a1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index ce0ae2022885ac..9496cd82947d4b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -68,8 +68,8 @@ define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t @@ -141,8 +141,8 @@ define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t @@ -214,8 +214,8 @@ define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t @@ -287,8 +287,8 @@ define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 668cff23429366..90276308252271 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -71,8 +71,8 @@ define @test_vp_splice_nxv1i1_masked( %va, @test_vp_splice_nxv2i1_masked( %va, @test_vp_splice_nxv4i1_masked( %va, @test_vp_splice_nxv8i1_masked( %va, @test_vp_splice_nxv16i1_masked( %va, ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v14, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vslideup.vx v10, v12, a0, v0.t @@ -437,8 +437,8 @@ define @test_vp_splice_nxv32i1_masked( %va, ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v16, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vslideup.vx v16, v12, a0, v0.t @@ -511,8 +511,8 @@ define @test_vp_splice_nxv64i1_masked( %va, ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v24, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vslideup.vx v24, v16, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c86fee6305931f..c0d7ecf74956b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -274,8 +274,8 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, %idxs @@ -2269,18 +2269,18 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @llvm.vp.gather.nxv16f64.nxv16p0( %ptrs, %m, i32 %evl) @@ -2319,20 +2319,20 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs @@ -2376,20 +2377,20 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: srli a4, a2, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a4 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB104_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB104_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; @@ -2398,25 +2399,26 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: sltu a4, a1, a3 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a4 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: bltu a1, a2, .LBB104_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB104_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -2434,20 +2436,20 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: srli a4, a2, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a4 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB105_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB105_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; @@ -2459,20 +2461,20 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: sltu a4, a1, a3 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a4 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: bltu a1, a2, .LBB105_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB105_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index f07c16476c56a2..1b1e9153a2fd59 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -444,18 +444,18 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: srli a5, a2, 3 ; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: bltu a1, a2, .LBB37_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB37_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.vp.load.nxv16f64.p0(ptr %ptr, %m, i32 %evl) @@ -489,10 +489,10 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpmerge_vv_nxv128i8( %va, @vpmerge_vx_nxv128i8(i8 %a, %vb, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %a, i32 0 @@ -442,8 +442,8 @@ define @vpmerge_vi_nxv128i8( %vb, @llvm.vp.merge.nxv128i8( %m, splat (i8 2), %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 351fc500145eac..59662db42898fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2124,10 +2124,10 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: sub a2, a1, a0 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a0 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret @@ -2157,13 +2157,13 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: sub a0, a2, a1 ; RV64-NEXT: sltu a2, a2, a0 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -2192,10 +2192,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: sub a3, a2, a1 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2232,11 +2232,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2244,6 +2243,7 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 @@ -2273,10 +2273,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: sub a3, a2, a1 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2308,22 +2308,22 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB101_2: +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 @@ -2355,10 +2355,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: sub a3, a2, a1 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2380,10 +2380,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index c12fc0497742ad..ce0ee38bc70476 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -380,10 +380,10 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, ) define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI73_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v12, (a0), zero ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: lui a1, %hi(.LCPI73_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v12, (a1), zero ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 46560fc501c6f2..f21b42e9519b67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -192,8 +192,8 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %v, %v, %v, %v, %v, % ; CHECK-NEXT: vmv.s.x v25, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v25, v16, v25, v0.t ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll index 94ed7e568a01e6..39666bb6119a0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -24,8 +24,8 @@ define zeroext i1 @vpreduce_or_nxv1i1(i1 zeroext %s, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, < ; CHECK-NEXT: sltu a4, a1, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a3, v8, v0.t ; CHECK-NEXT: snez a3, a3 ; CHECK-NEXT: bltu a1, a2, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a1, v11, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -391,8 +391,8 @@ define zeroext i1 @vpreduce_add_nxv1i1(i1 zeroext %s, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, < ; CHECK-LABEL: vpreduce_smin_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -696,8 +696,8 @@ define zeroext i1 @vpreduce_smin_nxv32i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_smin_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -712,8 +712,8 @@ define zeroext i1 @vpreduce_smin_nxv64i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_smin_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -728,8 +728,8 @@ define zeroext i1 @vpreduce_umax_nxv1i1(i1 zeroext %s, %v, %v, %v, %v, %v, < ; CHECK-LABEL: vpreduce_umax_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -808,8 +808,8 @@ define zeroext i1 @vpreduce_umax_nxv32i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_umax_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -824,8 +824,8 @@ define zeroext i1 @vpreduce_umax_nxv64i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_umax_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll index 462d49991ae4f6..e95e9fabe93422 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -12,11 +12,11 @@ define internal void @foo( %v15, %0, %vs12.i.i.i, %1, %v37) { ; NOSUBREG-LABEL: foo: ; NOSUBREG: # %bb.0: # %loopIR.preheader.i.i -; NOSUBREG-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; NOSUBREG-NEXT: vmv.v.i v14, 0 -; NOSUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; NOSUBREG-NEXT: vmv.v.i v9, 0 -; NOSUBREG-NEXT: vmv.v.i v8, 0 +; NOSUBREG-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; NOSUBREG-NEXT: vmv.v.i v14, 0 +; NOSUBREG-NEXT: vmv1r.v v8, v9 ; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; NOSUBREG-NEXT: vrgatherei16.vv v8, v9, v14 ; NOSUBREG-NEXT: .LBB0_1: # %loopIR3.i.i @@ -32,11 +32,11 @@ define internal void @foo( %v15, %0, @vsadd_vi_nxv128i8( %va, @llvm.vp.sadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1366,8 +1366,8 @@ define @vsadd_vi_nxv32i32( %va, @llvm.vp.sadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index 745b93b257085a..454a4ebab04a28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -586,8 +586,8 @@ define @vsaddu_vi_nxv128i8( %va, @llvm.vp.uadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1365,8 +1365,8 @@ define @vsaddu_vi_nxv32i32( %va, @llvm.vp.uadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index 4457c1002acc7b..53b8e4a78b756f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -211,12 +211,12 @@ define @vfmerge_fv_nxv32f16( %va, half ; CHECK-ZVFHMIN: # %bb.0: ; CHECK-ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; CHECK-ZVFHMIN-NEXT: vmv.v.v v20, v16 +; CHECK-ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; CHECK-ZVFHMIN-NEXT: vmv.v.v v28, v24 ; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 0a5e50160fbc93..ee0617c9314801 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -354,11 +354,17 @@ define @select_nxv32i32( %a, @select_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_nxv16f64( %a, @select_nxv16f64( %a, @test8(i64 %avl, i8 zeroext %cond, %a, %x, pt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vv v12, v12, v12 -; CHECK-NEXT: vs4r.v v12, (a0) ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: vs4r.v v12, (a0) +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %index = add %x, %x store %index, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll index 834e7dd85aea06..9b5a1a54ad5dfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll @@ -167,8 +167,8 @@ define @vsext_nxv32i8_nxv32i32( %a, @llvm.vp.sitofp.nxv32f16.nxv32i32( @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -396,16 +404,22 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 +; ZVFH-NEXT: addi a3, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.x.w v28, v16, v0.t +; ZVFH-NEXT: vfncvt.f.x.w v20, v24, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB25_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB25_2: +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v24 -; ZVFH-NEXT: vfncvt.f.x.w v24, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v24 +; ZVFH-NEXT: vfncvt.f.x.w v16, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32: @@ -428,8 +442,8 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB25_2: -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 @@ -460,8 +474,8 @@ define @vsitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.sitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index b56a0f40176cf3..613b58b0f1b88a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -593,22 +593,22 @@ define @vssub_vi_nxv128i8( %va, @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v @@ -1393,25 +1393,25 @@ define @vssub_vi_nxv32i32( %va, @llvm.vp.ssub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 8275c3081c7c16..8c729d7d9bfb6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -591,22 +591,22 @@ define @vssubu_vi_nxv128i8( %va, @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v @@ -1391,25 +1391,25 @@ define @vssubu_vi_nxv32i32( %va, @llvm.vp.usub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 4857810e7a1703..27755c166cc52d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -174,8 +174,8 @@ define @vtrunc_nxv15i16_nxv15i64( %a, @vtrunc_nxv32i7_nxv32i32( %a, @vtrunc_nxv32i8_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @llvm.vp.uitofp.nxv32f16.nxv32i32( @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -396,16 +404,22 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 +; ZVFH-NEXT: addi a3, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.xu.w v28, v16, v0.t +; ZVFH-NEXT: vfncvt.f.xu.w v20, v24, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB25_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB25_2: +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v24 -; ZVFH-NEXT: vfncvt.f.xu.w v24, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v24 +; ZVFH-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32: @@ -428,8 +442,8 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB25_2: -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 @@ -460,8 +474,8 @@ define @vuitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.uitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll index c5f34eee311890..a869b433a4952e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll @@ -85,9 +85,9 @@ define @test3( %0, %1, @test3( %0, %1, @vzext_nxv32i8_nxv32i32( %a,