From d9cd80171662501bafbfde2723dbcdb9a96bb3ea Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 17 Apr 2024 15:54:11 +0800 Subject: [PATCH] [RISCV] Defer forming x0,x0 vsetvlis till after insertion Stacked on #96200 Currently we try and detect when the VL doesn't change between two vsetvlis in emitVSETVLIs, and insert a VL-preserving vsetvli x0,x0 then and there. Doing it in situ has some drawbacks: - We lose information about what the VL is which can prevent doLocalPostpass from coalescing some vsetvlis further down the line - We have to explicitly handle x0,x0 form vsetvlis in coalesceVSETVLIs, whereas we don't in the top-down passes - This prevents us from sharing the VSETVLIInfo compatibility logic between the two, hence why we have canMutatePriorConfig This patch changes emitVSETVLIs to just emit regular vsetvlis, and adds a separate pass after coalesceVSETVLIs to convert vsetvlis to x0,x0 when possible. By removing the edge cases needed to handle x0,x0s, we can unify how we check vsetvli compatibility between coalesceVSETVLIs and emitVSETVLIs, and remove the duplicated logic in areCompatibleVTYPEs and canMutatePriorConfig. Note that when converting to x0,x0, we reuse the block data computed from the dataflow analysis despite it taking place after coalesceVSETVLIs. This turns out to be fine since coalesceVSETVLI never changes the exit state (only the local state within the block), and so the entry states stay the same too. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 339 ++++++------- .../RISCV/rvv/fixed-vectors-expandload-fp.ll | 54 +-- .../RISCV/rvv/fixed-vectors-expandload-int.ll | 36 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 2 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 459 ++++++------------ .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 36 +- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 2 +- .../CodeGen/RISCV/rvv/vsetvli-intrinsics.ll | 1 - 8 files changed, 352 insertions(+), 577 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e8541dc2b5c86a..a72c886ef8e663 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -74,15 +74,6 @@ static bool isVectorConfigInstr(const MachineInstr &MI) { MI.getOpcode() == RISCV::PseudoVSETIVLI; } -/// Return true if this is 'vsetvli x0, x0, vtype' which preserves -/// VL and only sets VTYPE. -static bool isVLPreservingConfig(const MachineInstr &MI) { - if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) - return false; - assert(RISCV::X0 == MI.getOperand(1).getReg()); - return RISCV::X0 == MI.getOperand(0).getReg(); -} - static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: @@ -332,60 +323,6 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { return Fractional || LMul == 1; } -/// Return true if moving from CurVType to NewVType is -/// indistinguishable from the perspective of an instruction (or set -/// of instructions) which use only the Used subfields and properties. -static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, - const DemandedFields &Used) { - switch (Used.SEW) { - case DemandedFields::SEWNone: - break; - case DemandedFields::SEWEqual: - if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqual: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || - RISCVVType::getSEW(NewVType) >= 64) - return false; - break; - } - - switch (Used.LMUL) { - case DemandedFields::LMULNone: - break; - case DemandedFields::LMULEqual: - if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) - return false; - break; - case DemandedFields::LMULLessThanOrEqualToM1: - if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) - return false; - break; - } - - if (Used.SEWLMULRatio) { - auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), - RISCVVType::getVLMUL(CurVType)); - auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), - RISCVVType::getVLMUL(NewVType)); - if (Ratio1 != Ratio2) - return false; - } - - if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != - RISCVVType::isTailAgnostic(NewVType)) - return false; - if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != - RISCVVType::isMaskAgnostic(NewVType)) - return false; - return true; -} - /// Return the fields and properties demanded by the provided instruction. DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { // This function works in coalesceVSETVLI too. We can still use the value of a @@ -725,9 +662,56 @@ class VSETVLIInfo { return getSEWLMULRatio() == Other.getSEWLMULRatio(); } + /// Return true if moving from Require to this is indistinguishable from the + /// perspective of an instruction (or set of instructions) which use only the + /// Used subfields and properties. bool hasCompatibleVTYPE(const DemandedFields &Used, const VSETVLIInfo &Require) const { - return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); + // If either only has information about the SEW/LMUL ratio, they can only be + // compatible if we only use the SEW/LMUL ratio. + if ((SEWLMULRatioOnly || Require.SEWLMULRatioOnly) && + (Used.SEW != DemandedFields::SEWNone || Used.MaskPolicy || + Used.TailPolicy || Used.LMUL)) + return false; + + switch (Used.SEW) { + case DemandedFields::SEWNone: + break; + case DemandedFields::SEWEqual: + if (Require.SEW != SEW) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqual: + if (SEW < Require.SEW) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: + if (SEW < Require.SEW || SEW >= 64) + return false; + break; + } + + switch (Used.LMUL) { + case DemandedFields::LMULNone: + break; + case DemandedFields::LMULEqual: + if (VLMul != Require.VLMul) + return false; + break; + case DemandedFields::LMULLessThanOrEqualToM1: + if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(VLMul))) + return false; + break; + } + + if (Used.SEWLMULRatio && Require.getSEWLMULRatio() != getSEWLMULRatio()) + return false; + + if (Used.TailPolicy && Require.TailAgnostic != TailAgnostic) + return false; + if (Used.MaskPolicy && Require.MaskAgnostic != MaskAgnostic) + return false; + return true; } // Determine whether the vector instructions requirements represented by @@ -741,10 +725,6 @@ class VSETVLIInfo { if (isUnknown() || Require.isUnknown()) return false; - // If only our VLMAX ratio is valid, then this isn't compatible. - if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) - return false; - if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) return false; @@ -919,6 +899,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); + void convertToX0X0(MachineBasicBlock &MBB); void insertReadVL(MachineBasicBlock &MBB); bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, @@ -1086,40 +1067,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { ++NumInsertedVSETVL; - if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { - // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same - // VLMAX. - if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - if (LIS) - LIS->InsertMachineInstrInMaps(*MI); - return; - } - - // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If - // it has the same VLMAX we want and the last VL/VTYPE we observed is the - // same, we can use the X0, X0 form. - if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { - if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); - DefMI && isVectorConfigInstr(*DefMI)) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - if (LIS) - LIS->InsertMachineInstrInMaps(*MI); - return; - } - } - } - } if (Info.hasAVLImm()) { auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) @@ -1594,43 +1541,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { AvailableInfo, OldExit); } -// Return true if we can mutate PrevMI to match MI without changing any the -// fields which would be observed. -bool RISCVInsertVSETVLI::canMutatePriorConfig( - const MachineInstr &PrevMI, const MachineInstr &MI, - const DemandedFields &Used) const { - // If the VL values aren't equal, return false if either a) the former is - // demanded, or b) we can't rewrite the former to be the later for - // implementation reasons. - if (!isVLPreservingConfig(MI)) { - if (Used.VLAny) - return false; - - if (Used.VLZeroness) { - if (isVLPreservingConfig(PrevMI)) - return false; - if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), - LIS)) - return false; - } - - auto &AVL = MI.getOperand(1); - auto &PrevAVL = PrevMI.getOperand(1); - - // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. - // For now just check that PrevMI uses the same virtual register. - if (AVL.isReg() && AVL.getReg() != RISCV::X0 && - (!MRI->hasOneDef(AVL.getReg()) || !PrevAVL.isReg() || - PrevAVL.getReg() != AVL.getReg())) - return false; - } - - assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); - auto PriorVType = PrevMI.getOperand(2).getImm(); - auto VType = MI.getOperand(2).getImm(); - return areCompatibleVTYPEs(PriorVType, VType, Used); -} - void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { MachineInstr *NextMI = nullptr; // We can have arbitrary code in successors, so VL and VTYPE @@ -1660,57 +1570,79 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { continue; } - if (canMutatePriorConfig(MI, *NextMI, Used)) { - if (!isVLPreservingConfig(*NextMI)) { - Register DefReg = NextMI->getOperand(0).getReg(); - - MI.getOperand(0).setReg(DefReg); - MI.getOperand(0).setIsDead(false); - - // The def of DefReg moved to MI, so extend the LiveInterval up to - // it. - if (DefReg.isVirtual() && LIS) { - LiveInterval &DefLI = LIS->getInterval(DefReg); - SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); - VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); - LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); - DefLI.addSegment(S); - DefVNI->def = MISlot; - // Mark DefLI as spillable if it was previously unspillable - DefLI.setWeight(0); - - // DefReg may have had no uses, in which case we need to shrink - // the LiveInterval up to MI. - LIS->shrinkToUses(&DefLI); - } + const VSETVLIInfo MIInfo = getInfoForVSETVLI(MI); + const VSETVLIInfo NextMIInfo = getInfoForVSETVLI(*NextMI); + + // If the new AVL is a register make sure it dominates PrevMI. For now + // just check that it's the same AVL used by PrevMI. + bool NewAVLDominates = true; + if (NextMIInfo.hasAVLReg() && NextMIInfo.getAVLReg().isVirtual()) + NewAVLDominates = MIInfo.hasSameAVL(NextMIInfo); + + // We are coalescing two vsetvlis into one, so at least one of the defs + // will need to be dead. + const MachineOperand *DefOp = nullptr; + if (MI.getOperand(0).isDead()) + DefOp = &NextMI->getOperand(0); + else if (NextMI->getOperand(0).isDead()) + DefOp = &MI.getOperand(0); + + if (NextMIInfo.isCompatible(Used, MIInfo, LIS) && NewAVLDominates && + DefOp) { + MI.getOperand(0).setReg(DefOp->getReg()); + MI.getOperand(0).setIsDead(DefOp->isDead()); + + // If NextMI defined a reg, it will have moved to MI so extend + // the LiveInterval up to it. + if (NextMI->getOperand(0).getReg().isVirtual() && LIS) { + LiveInterval &DefLI = + LIS->getInterval(NextMI->getOperand(0).getReg()); + SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); + VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); + LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); + DefLI.addSegment(S); + DefVNI->def = MISlot; + // Mark DefLI as spillable if it was previously unspillable + DefLI.setWeight(0); + + // DefReg may have had no uses, in which case we need to shrink + // the LiveInterval up to MI. + LIS->shrinkToUses(&DefLI); + } - Register OldVLReg; - if (MI.getOperand(1).isReg()) - OldVLReg = MI.getOperand(1).getReg(); - if (NextMI->getOperand(1).isImm()) - MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); - else - MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); + Register OldVLReg; + if (MI.getOperand(1).isReg()) + OldVLReg = MI.getOperand(1).getReg(); + if (NextMIInfo.hasAVLImm()) + MI.getOperand(1).ChangeToImmediate(NextMIInfo.getAVLImm()); + else if (NextMIInfo.hasAVLVLMAX()) { + assert(MI.getOperand(0).getReg() != RISCV::X0); + MI.getOperand(1).ChangeToRegister(RISCV::X0, false); + } else + MI.getOperand(1).ChangeToRegister(NextMIInfo.getAVLReg(), false); + + // Clear NextMI's AVL early so we're not counting it as a use. + if (NextMI->getOperand(1).isReg()) { + Register Reg = NextMI->getOperand(1).getReg(); + NextMI->getOperand(1).setReg(RISCV::NoRegister); + if (LIS && Reg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(Reg)); + } - // Clear NextMI's AVL early so we're not counting it as a use. - if (NextMI->getOperand(1).isReg()) - NextMI->getOperand(1).setReg(RISCV::NoRegister); + if (OldVLReg && OldVLReg.isVirtual()) { + // NextMI no longer uses OldVLReg so shrink its LiveInterval. + if (LIS) + LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); - if (OldVLReg && OldVLReg.isVirtual()) { - // NextMI no longer uses OldVLReg so shrink its LiveInterval. + MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); + if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && + MRI->use_nodbg_empty(OldVLReg)) { + VLOpDef->eraseFromParent(); if (LIS) - LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); - - MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); - if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && - MRI->use_nodbg_empty(OldVLReg)) { - VLOpDef->eraseFromParent(); - if (LIS) - LIS->removeInterval(OldVLReg); - } + LIS->removeInterval(OldVLReg); } - MI.setDesc(NextMI->getDesc()); } + MI.setDesc(NextMI->getDesc()); MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); ToDelete.push_back(NextMI); // fallthrough @@ -1753,6 +1685,38 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { } } +void RISCVInsertVSETVLI::convertToX0X0(MachineBasicBlock &MBB) { + VSETVLIInfo Info = BlockInfo[MBB.getNumber()].Pred; + for (MachineInstr &MI : MBB) { + if (isVectorConfigInstr(MI)) { + VSETVLIInfo MIInfo = getInfoForVSETVLI(MI); + + // If VL doesn't change going from Info to MIInfo, then we can use x0,x0 + DemandedFields Demanded; + Demanded.demandVL(); + + if (Info.isCompatible(Demanded, MIInfo, LIS) && + MI.getOperand(0).isDead()) { + MI.setDesc(TII->get(RISCV::PseudoVSETVLIX0)); + MI.getOperand(0).ChangeToRegister(RISCV::X0, /*isDef*/ true); + MI.getOperand(0).setIsDead(true); + Register OldReg; + if (MI.getOperand(1).isReg()) + OldReg = MI.getOperand(1).getReg(); + MI.getOperand(1).ChangeToRegister(RISCV::X0, /*isDef*/ false); + MI.getOperand(1).setIsKill(true); + if (LIS && OldReg && OldReg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(OldReg)); + MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, + /*isImp*/ true)); + Info = MIInfo; // transferAfter can't handle x0,x0 + continue; + } + } + transferAfter(Info, MI); + } +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. ST = &MF.getSubtarget(); @@ -1821,6 +1785,13 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) coalesceVSETVLIs(MBB); + // Find vset[i]vlis that don't change VL and replace them with vsetvli x0,x0. + // Defer this to the end rather than during vsetvli insertion so we don't lose + // any information about the AVL which may help us coalesce them in + // doLocalPostpass. + for (MachineBasicBlock &MBB : MF) + convertToX0X0(MBB); + // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output // of VLEFF/VLSEGFF. for (MachineBasicBlock &MBB : MF) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll index 8b31166e313deb..fe43dcd3fd2cd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll @@ -51,9 +51,8 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV32-NEXT: beqz a1, .LBB1_2 ; RV32-NEXT: .LBB1_4: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -77,9 +76,8 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV64-NEXT: beqz a1, .LBB1_2 ; RV64-NEXT: .LBB1_4: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x half> @llvm.masked.expandload.v2f16(ptr align 2 %base, <2 x i1> %mask, <2 x half> %src0) @@ -114,9 +112,8 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV32-NEXT: beqz a2, .LBB2_2 ; RV32-NEXT: .LBB2_6: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -162,9 +159,8 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV64-NEXT: beqz a2, .LBB2_2 ; RV64-NEXT: .LBB2_6: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -227,9 +223,8 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: .LBB3_10: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -319,9 +314,8 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV64-NEXT: beqz a2, .LBB3_2 ; RV64-NEXT: .LBB3_10: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -425,9 +419,8 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV32-NEXT: beqz a1, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -451,9 +444,8 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV64-NEXT: beqz a1, .LBB5_2 ; RV64-NEXT: .LBB5_4: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x float> @llvm.masked.expandload.v2f32(ptr align 4 %base, <2 x i1> %mask, <2 x float> %src0) @@ -488,9 +480,8 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV32-NEXT: beqz a2, .LBB6_2 ; RV32-NEXT: .LBB6_6: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -536,9 +527,8 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV64-NEXT: beqz a2, .LBB6_2 ; RV64-NEXT: .LBB6_6: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -601,9 +591,8 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV32-NEXT: beqz a2, .LBB7_2 ; RV32-NEXT: .LBB7_10: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -693,9 +682,8 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV64-NEXT: beqz a2, .LBB7_2 ; RV64-NEXT: .LBB7_10: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -799,9 +787,8 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV32-NEXT: beqz a1, .LBB9_2 ; RV32-NEXT: .LBB9_4: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -825,9 +812,8 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV64-NEXT: beqz a1, .LBB9_2 ; RV64-NEXT: .LBB9_4: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x double> @llvm.masked.expandload.v2f64(ptr align 8 %base, <2 x i1> %mask, <2 x double> %src0) @@ -862,9 +848,8 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV32-NEXT: beqz a2, .LBB10_2 ; RV32-NEXT: .LBB10_6: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -910,9 +895,8 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV64-NEXT: beqz a2, .LBB10_2 ; RV64-NEXT: .LBB10_6: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -975,9 +959,8 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV32-NEXT: beqz a2, .LBB11_2 ; RV32-NEXT: .LBB11_10: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vfmv.s.f v12, fa5 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vfmv.s.f v12, fa5 ; RV32-NEXT: vslideup.vi v8, v12, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -1067,9 +1050,8 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV64-NEXT: beqz a2, .LBB11_2 ; RV64-NEXT: .LBB11_10: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vfmv.s.f v12, fa5 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vfmv.s.f v12, fa5 ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll index 5bf8b07efc1daf..4ba3394fb60303 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll @@ -40,9 +40,8 @@ define <2 x i8> @expandload_v2i8(ptr %base, <2 x i8> %src0, <2 x i1> %mask) { ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: .LBB1_4: # %cond.load1 ; CHECK-NEXT: lbu a0, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> %mask, <2 x i8> %src0) @@ -77,9 +76,8 @@ define <4 x i8> @expandload_v4i8(ptr %base, <4 x i8> %src0, <4 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB2_2 ; CHECK-NEXT: .LBB2_6: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -142,9 +140,8 @@ define <8 x i8> @expandload_v8i8(ptr %base, <8 x i8> %src0, <8 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB3_2 ; CHECK-NEXT: .LBB3_10: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -237,9 +234,8 @@ define <2 x i16> @expandload_v2i16(ptr %base, <2 x i16> %src0, <2 x i1> %mask) { ; CHECK-NEXT: beqz a1, .LBB5_2 ; CHECK-NEXT: .LBB5_4: # %cond.load1 ; CHECK-NEXT: lh a0, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.masked.expandload.v2i16(ptr align 2 %base, <2 x i1> %mask, <2 x i16> %src0) @@ -274,9 +270,8 @@ define <4 x i16> @expandload_v4i16(ptr %base, <4 x i16> %src0, <4 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB6_2 ; CHECK-NEXT: .LBB6_6: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -339,9 +334,8 @@ define <8 x i16> @expandload_v8i16(ptr %base, <8 x i16> %src0, <8 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB7_2 ; CHECK-NEXT: .LBB7_10: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -434,9 +428,8 @@ define <2 x i32> @expandload_v2i32(ptr %base, <2 x i32> %src0, <2 x i1> %mask) { ; CHECK-NEXT: beqz a1, .LBB9_2 ; CHECK-NEXT: .LBB9_4: # %cond.load1 ; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.masked.expandload.v2i32(ptr align 4 %base, <2 x i1> %mask, <2 x i32> %src0) @@ -471,9 +464,8 @@ define <4 x i32> @expandload_v4i32(ptr %base, <4 x i32> %src0, <4 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB10_2 ; CHECK-NEXT: .LBB10_6: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -536,9 +528,8 @@ define <8 x i32> @expandload_v8i32(ptr %base, <8 x i32> %src0, <8 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB11_2 ; CHECK-NEXT: .LBB11_10: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -680,9 +671,8 @@ define <2 x i64> @expandload_v2i64(ptr %base, <2 x i64> %src0, <2 x i1> %mask) { ; RV64-NEXT: beqz a1, .LBB13_2 ; RV64-NEXT: .LBB13_4: # %cond.load1 ; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 %base, <2 x i1> %mask, <2 x i64> %src0) @@ -775,9 +765,8 @@ define <4 x i64> @expandload_v4i64(ptr %base, <4 x i64> %src0, <4 x i1> %mask) { ; RV64-NEXT: beqz a2, .LBB14_2 ; RV64-NEXT: .LBB14_6: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.s.x v10, a2 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v10, a2 ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -954,9 +943,8 @@ define <8 x i64> @expandload_v8i64(ptr %base, <8 x i64> %src0, <8 x i1> %mask) { ; RV64-NEXT: beqz a2, .LBB15_2 ; RV64-NEXT: .LBB15_10: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.s.x v12, a2 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v12, a2 ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 379a51f4eee301..96e0a31dd427f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -39,7 +39,7 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; CHECK-NEXT: vmul.vx v14, v12, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vadd.vi v8, v14, -14 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index a4f9eeb59cd5bf..93a97d667ec2fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -89,9 +89,8 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) @@ -138,9 +137,8 @@ define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB2_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -192,9 +190,8 @@ define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB3_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -246,9 +243,8 @@ define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB4_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -300,9 +296,8 @@ define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB5_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -361,9 +356,8 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -423,9 +417,8 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -484,9 +477,8 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 @@ -617,9 +609,8 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 @@ -889,9 +880,8 @@ define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthr ; RV64ZVE32F-NEXT: beqz a2, .LBB14_2 ; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) @@ -938,9 +928,8 @@ define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB15_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB15_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -992,9 +981,8 @@ define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB16_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB16_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -1053,9 +1041,8 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB17_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1117,9 +1104,8 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1180,9 +1166,8 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3 @@ -1313,9 +1298,8 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3 @@ -1414,9 +1398,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB23_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1441,9 +1424,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -1461,9 +1443,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_6 @@ -1474,9 +1455,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_7 @@ -1486,9 +1466,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8 @@ -1498,9 +1477,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB23_11 @@ -1511,9 +1489,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1566,9 +1543,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1593,9 +1569,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -1613,9 +1588,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_6 @@ -1626,9 +1600,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_7 @@ -1638,9 +1611,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8 @@ -1650,9 +1622,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB24_11 @@ -1663,9 +1634,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1719,9 +1689,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1747,9 +1716,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -1768,9 +1736,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_6 @@ -1782,9 +1749,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_7 @@ -1795,9 +1761,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8 @@ -1808,9 +1773,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB25_11 @@ -1822,9 +1786,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -2057,9 +2020,8 @@ define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthr ; RV64ZVE32F-NEXT: beqz a2, .LBB28_2 ; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) @@ -2115,9 +2077,8 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2175,9 +2136,8 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2238,9 +2198,8 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3 @@ -2370,9 +2329,8 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3 @@ -2470,9 +2428,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -2497,9 +2454,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2517,9 +2473,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 @@ -2530,9 +2485,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 @@ -2542,9 +2496,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 @@ -2554,9 +2507,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11 @@ -2567,9 +2519,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2621,9 +2572,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -2648,9 +2598,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2668,9 +2617,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6 @@ -2681,9 +2629,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7 @@ -2693,9 +2640,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 @@ -2705,9 +2651,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB36_11 @@ -2718,9 +2663,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2776,9 +2720,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -2804,9 +2747,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2825,9 +2767,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6 @@ -2839,9 +2780,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7 @@ -2852,9 +2792,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 @@ -2865,9 +2804,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB37_11 @@ -2879,9 +2817,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2935,9 +2872,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -2962,9 +2898,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2982,9 +2917,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6 @@ -2995,9 +2929,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7 @@ -3007,9 +2940,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 @@ -3019,9 +2951,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB38_11 @@ -3032,9 +2963,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3087,9 +3017,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -3114,9 +3043,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -3134,9 +3062,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6 @@ -3147,9 +3074,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7 @@ -3159,9 +3085,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 @@ -3171,9 +3096,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB39_11 @@ -3184,9 +3108,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3243,9 +3166,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -3271,9 +3193,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 @@ -3292,9 +3213,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_6 @@ -3306,9 +3226,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_7 @@ -3319,9 +3238,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 @@ -3332,9 +3250,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_11 @@ -3346,9 +3263,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -6951,9 +6867,8 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1 ; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) @@ -7004,9 +6919,8 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 @@ -7137,9 +7051,8 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 @@ -7238,9 +7151,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7265,9 +7177,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -7285,9 +7196,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_6 @@ -7298,9 +7208,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_7 @@ -7310,9 +7219,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8 @@ -7322,9 +7230,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB64_11 @@ -7335,9 +7242,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7390,9 +7296,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7417,9 +7322,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -7437,9 +7341,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_6 @@ -7450,9 +7353,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_7 @@ -7462,9 +7364,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8 @@ -7474,9 +7375,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB65_11 @@ -7487,9 +7387,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7543,9 +7442,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7571,9 +7469,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -7592,9 +7489,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_6 @@ -7606,9 +7502,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_7 @@ -7619,9 +7514,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8 @@ -7632,9 +7526,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB66_11 @@ -7646,9 +7539,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7881,9 +7773,8 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 ; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1 ; RV64ZVE32F-NEXT: flw fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru) @@ -7934,9 +7825,8 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas ; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_3 @@ -8066,9 +7956,8 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_3 @@ -8166,9 +8055,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -8193,9 +8081,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8213,9 +8100,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 @@ -8226,9 +8112,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 @@ -8238,9 +8123,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 @@ -8250,9 +8134,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11 @@ -8263,9 +8146,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8317,9 +8199,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -8344,9 +8225,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8364,9 +8244,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_6 @@ -8377,9 +8256,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_7 @@ -8389,9 +8267,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 @@ -8401,9 +8278,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB75_11 @@ -8414,9 +8290,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8472,9 +8347,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -8500,9 +8374,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8521,9 +8394,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_6 @@ -8535,9 +8407,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_7 @@ -8548,9 +8419,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 @@ -8561,9 +8431,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB76_11 @@ -8575,9 +8444,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8631,9 +8499,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -8658,9 +8525,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8678,9 +8544,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_6 @@ -8691,9 +8556,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_7 @@ -8703,9 +8567,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 @@ -8715,9 +8578,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB77_11 @@ -8728,9 +8590,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8783,9 +8644,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -8810,9 +8670,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8830,9 +8689,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 @@ -8843,9 +8701,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 @@ -8855,9 +8712,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 @@ -8867,9 +8723,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB78_11 @@ -8880,9 +8735,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8939,9 +8793,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -8967,9 +8820,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 @@ -8988,9 +8840,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_6 @@ -9002,9 +8853,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_7 @@ -9015,9 +8865,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 @@ -9028,9 +8877,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_11 @@ -9042,9 +8890,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 1dd74a7c9dd1b1..57c0069e1a3385 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1942,9 +1942,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6 @@ -2079,9 +2078,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6 @@ -2222,9 +2220,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6 @@ -2365,9 +2362,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6 @@ -2503,9 +2499,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6 @@ -2647,9 +2642,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_6 @@ -7062,9 +7056,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_6 @@ -7199,9 +7192,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_6 @@ -7342,9 +7334,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_6 @@ -7485,9 +7476,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_6 @@ -7623,9 +7613,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_6 @@ -7767,9 +7756,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_6 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 5a6364967eba25..79167cb4ab64e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -450,7 +450,7 @@ define void @saxpy_vec(i64 %n, float %a, ptr nocapture readonly %x, ptr nocaptur ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll index 20dfc7755d4c45..687c3ebeb56613 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll @@ -123,7 +123,6 @@ define @redundant_vsetvli(iXLen %avl, ptr %ptr) nounwind { define @repeated_vsetvli(iXLen %avl, ptr %ptr) nounwind { ; CHECK-LABEL: repeated_vsetvli: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, ma ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret