diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e8541dc2b5c86a..a72c886ef8e663 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -74,15 +74,6 @@ static bool isVectorConfigInstr(const MachineInstr &MI) { MI.getOpcode() == RISCV::PseudoVSETIVLI; } -/// Return true if this is 'vsetvli x0, x0, vtype' which preserves -/// VL and only sets VTYPE. -static bool isVLPreservingConfig(const MachineInstr &MI) { - if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) - return false; - assert(RISCV::X0 == MI.getOperand(1).getReg()); - return RISCV::X0 == MI.getOperand(0).getReg(); -} - static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: @@ -332,60 +323,6 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { return Fractional || LMul == 1; } -/// Return true if moving from CurVType to NewVType is -/// indistinguishable from the perspective of an instruction (or set -/// of instructions) which use only the Used subfields and properties. -static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, - const DemandedFields &Used) { - switch (Used.SEW) { - case DemandedFields::SEWNone: - break; - case DemandedFields::SEWEqual: - if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqual: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || - RISCVVType::getSEW(NewVType) >= 64) - return false; - break; - } - - switch (Used.LMUL) { - case DemandedFields::LMULNone: - break; - case DemandedFields::LMULEqual: - if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) - return false; - break; - case DemandedFields::LMULLessThanOrEqualToM1: - if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) - return false; - break; - } - - if (Used.SEWLMULRatio) { - auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), - RISCVVType::getVLMUL(CurVType)); - auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), - RISCVVType::getVLMUL(NewVType)); - if (Ratio1 != Ratio2) - return false; - } - - if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != - RISCVVType::isTailAgnostic(NewVType)) - return false; - if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != - RISCVVType::isMaskAgnostic(NewVType)) - return false; - return true; -} - /// Return the fields and properties demanded by the provided instruction. DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { // This function works in coalesceVSETVLI too. We can still use the value of a @@ -725,9 +662,56 @@ class VSETVLIInfo { return getSEWLMULRatio() == Other.getSEWLMULRatio(); } + /// Return true if moving from Require to this is indistinguishable from the + /// perspective of an instruction (or set of instructions) which use only the + /// Used subfields and properties. bool hasCompatibleVTYPE(const DemandedFields &Used, const VSETVLIInfo &Require) const { - return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); + // If either only has information about the SEW/LMUL ratio, they can only be + // compatible if we only use the SEW/LMUL ratio. + if ((SEWLMULRatioOnly || Require.SEWLMULRatioOnly) && + (Used.SEW != DemandedFields::SEWNone || Used.MaskPolicy || + Used.TailPolicy || Used.LMUL)) + return false; + + switch (Used.SEW) { + case DemandedFields::SEWNone: + break; + case DemandedFields::SEWEqual: + if (Require.SEW != SEW) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqual: + if (SEW < Require.SEW) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: + if (SEW < Require.SEW || SEW >= 64) + return false; + break; + } + + switch (Used.LMUL) { + case DemandedFields::LMULNone: + break; + case DemandedFields::LMULEqual: + if (VLMul != Require.VLMul) + return false; + break; + case DemandedFields::LMULLessThanOrEqualToM1: + if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(VLMul))) + return false; + break; + } + + if (Used.SEWLMULRatio && Require.getSEWLMULRatio() != getSEWLMULRatio()) + return false; + + if (Used.TailPolicy && Require.TailAgnostic != TailAgnostic) + return false; + if (Used.MaskPolicy && Require.MaskAgnostic != MaskAgnostic) + return false; + return true; } // Determine whether the vector instructions requirements represented by @@ -741,10 +725,6 @@ class VSETVLIInfo { if (isUnknown() || Require.isUnknown()) return false; - // If only our VLMAX ratio is valid, then this isn't compatible. - if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) - return false; - if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) return false; @@ -919,6 +899,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); + void convertToX0X0(MachineBasicBlock &MBB); void insertReadVL(MachineBasicBlock &MBB); bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, @@ -1086,40 +1067,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { ++NumInsertedVSETVL; - if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { - // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same - // VLMAX. - if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - if (LIS) - LIS->InsertMachineInstrInMaps(*MI); - return; - } - - // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If - // it has the same VLMAX we want and the last VL/VTYPE we observed is the - // same, we can use the X0, X0 form. - if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { - if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); - DefMI && isVectorConfigInstr(*DefMI)) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - if (LIS) - LIS->InsertMachineInstrInMaps(*MI); - return; - } - } - } - } if (Info.hasAVLImm()) { auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) @@ -1594,43 +1541,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { AvailableInfo, OldExit); } -// Return true if we can mutate PrevMI to match MI without changing any the -// fields which would be observed. -bool RISCVInsertVSETVLI::canMutatePriorConfig( - const MachineInstr &PrevMI, const MachineInstr &MI, - const DemandedFields &Used) const { - // If the VL values aren't equal, return false if either a) the former is - // demanded, or b) we can't rewrite the former to be the later for - // implementation reasons. - if (!isVLPreservingConfig(MI)) { - if (Used.VLAny) - return false; - - if (Used.VLZeroness) { - if (isVLPreservingConfig(PrevMI)) - return false; - if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), - LIS)) - return false; - } - - auto &AVL = MI.getOperand(1); - auto &PrevAVL = PrevMI.getOperand(1); - - // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. - // For now just check that PrevMI uses the same virtual register. - if (AVL.isReg() && AVL.getReg() != RISCV::X0 && - (!MRI->hasOneDef(AVL.getReg()) || !PrevAVL.isReg() || - PrevAVL.getReg() != AVL.getReg())) - return false; - } - - assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); - auto PriorVType = PrevMI.getOperand(2).getImm(); - auto VType = MI.getOperand(2).getImm(); - return areCompatibleVTYPEs(PriorVType, VType, Used); -} - void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { MachineInstr *NextMI = nullptr; // We can have arbitrary code in successors, so VL and VTYPE @@ -1660,57 +1570,79 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { continue; } - if (canMutatePriorConfig(MI, *NextMI, Used)) { - if (!isVLPreservingConfig(*NextMI)) { - Register DefReg = NextMI->getOperand(0).getReg(); - - MI.getOperand(0).setReg(DefReg); - MI.getOperand(0).setIsDead(false); - - // The def of DefReg moved to MI, so extend the LiveInterval up to - // it. - if (DefReg.isVirtual() && LIS) { - LiveInterval &DefLI = LIS->getInterval(DefReg); - SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); - VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); - LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); - DefLI.addSegment(S); - DefVNI->def = MISlot; - // Mark DefLI as spillable if it was previously unspillable - DefLI.setWeight(0); - - // DefReg may have had no uses, in which case we need to shrink - // the LiveInterval up to MI. - LIS->shrinkToUses(&DefLI); - } + const VSETVLIInfo MIInfo = getInfoForVSETVLI(MI); + const VSETVLIInfo NextMIInfo = getInfoForVSETVLI(*NextMI); + + // If the new AVL is a register make sure it dominates PrevMI. For now + // just check that it's the same AVL used by PrevMI. + bool NewAVLDominates = true; + if (NextMIInfo.hasAVLReg() && NextMIInfo.getAVLReg().isVirtual()) + NewAVLDominates = MIInfo.hasSameAVL(NextMIInfo); + + // We are coalescing two vsetvlis into one, so at least one of the defs + // will need to be dead. + const MachineOperand *DefOp = nullptr; + if (MI.getOperand(0).isDead()) + DefOp = &NextMI->getOperand(0); + else if (NextMI->getOperand(0).isDead()) + DefOp = &MI.getOperand(0); + + if (NextMIInfo.isCompatible(Used, MIInfo, LIS) && NewAVLDominates && + DefOp) { + MI.getOperand(0).setReg(DefOp->getReg()); + MI.getOperand(0).setIsDead(DefOp->isDead()); + + // If NextMI defined a reg, it will have moved to MI so extend + // the LiveInterval up to it. + if (NextMI->getOperand(0).getReg().isVirtual() && LIS) { + LiveInterval &DefLI = + LIS->getInterval(NextMI->getOperand(0).getReg()); + SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); + VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); + LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); + DefLI.addSegment(S); + DefVNI->def = MISlot; + // Mark DefLI as spillable if it was previously unspillable + DefLI.setWeight(0); + + // DefReg may have had no uses, in which case we need to shrink + // the LiveInterval up to MI. + LIS->shrinkToUses(&DefLI); + } - Register OldVLReg; - if (MI.getOperand(1).isReg()) - OldVLReg = MI.getOperand(1).getReg(); - if (NextMI->getOperand(1).isImm()) - MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); - else - MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); + Register OldVLReg; + if (MI.getOperand(1).isReg()) + OldVLReg = MI.getOperand(1).getReg(); + if (NextMIInfo.hasAVLImm()) + MI.getOperand(1).ChangeToImmediate(NextMIInfo.getAVLImm()); + else if (NextMIInfo.hasAVLVLMAX()) { + assert(MI.getOperand(0).getReg() != RISCV::X0); + MI.getOperand(1).ChangeToRegister(RISCV::X0, false); + } else + MI.getOperand(1).ChangeToRegister(NextMIInfo.getAVLReg(), false); + + // Clear NextMI's AVL early so we're not counting it as a use. + if (NextMI->getOperand(1).isReg()) { + Register Reg = NextMI->getOperand(1).getReg(); + NextMI->getOperand(1).setReg(RISCV::NoRegister); + if (LIS && Reg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(Reg)); + } - // Clear NextMI's AVL early so we're not counting it as a use. - if (NextMI->getOperand(1).isReg()) - NextMI->getOperand(1).setReg(RISCV::NoRegister); + if (OldVLReg && OldVLReg.isVirtual()) { + // NextMI no longer uses OldVLReg so shrink its LiveInterval. + if (LIS) + LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); - if (OldVLReg && OldVLReg.isVirtual()) { - // NextMI no longer uses OldVLReg so shrink its LiveInterval. + MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); + if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && + MRI->use_nodbg_empty(OldVLReg)) { + VLOpDef->eraseFromParent(); if (LIS) - LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); - - MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); - if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && - MRI->use_nodbg_empty(OldVLReg)) { - VLOpDef->eraseFromParent(); - if (LIS) - LIS->removeInterval(OldVLReg); - } + LIS->removeInterval(OldVLReg); } - MI.setDesc(NextMI->getDesc()); } + MI.setDesc(NextMI->getDesc()); MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); ToDelete.push_back(NextMI); // fallthrough @@ -1753,6 +1685,38 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { } } +void RISCVInsertVSETVLI::convertToX0X0(MachineBasicBlock &MBB) { + VSETVLIInfo Info = BlockInfo[MBB.getNumber()].Pred; + for (MachineInstr &MI : MBB) { + if (isVectorConfigInstr(MI)) { + VSETVLIInfo MIInfo = getInfoForVSETVLI(MI); + + // If VL doesn't change going from Info to MIInfo, then we can use x0,x0 + DemandedFields Demanded; + Demanded.demandVL(); + + if (Info.isCompatible(Demanded, MIInfo, LIS) && + MI.getOperand(0).isDead()) { + MI.setDesc(TII->get(RISCV::PseudoVSETVLIX0)); + MI.getOperand(0).ChangeToRegister(RISCV::X0, /*isDef*/ true); + MI.getOperand(0).setIsDead(true); + Register OldReg; + if (MI.getOperand(1).isReg()) + OldReg = MI.getOperand(1).getReg(); + MI.getOperand(1).ChangeToRegister(RISCV::X0, /*isDef*/ false); + MI.getOperand(1).setIsKill(true); + if (LIS && OldReg && OldReg.isVirtual()) + LIS->shrinkToUses(&LIS->getInterval(OldReg)); + MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, + /*isImp*/ true)); + Info = MIInfo; // transferAfter can't handle x0,x0 + continue; + } + } + transferAfter(Info, MI); + } +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. ST = &MF.getSubtarget(); @@ -1821,6 +1785,13 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) coalesceVSETVLIs(MBB); + // Find vset[i]vlis that don't change VL and replace them with vsetvli x0,x0. + // Defer this to the end rather than during vsetvli insertion so we don't lose + // any information about the AVL which may help us coalesce them in + // doLocalPostpass. + for (MachineBasicBlock &MBB : MF) + convertToX0X0(MBB); + // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output // of VLEFF/VLSEGFF. for (MachineBasicBlock &MBB : MF) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll index 8b31166e313deb..fe43dcd3fd2cd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll @@ -51,9 +51,8 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV32-NEXT: beqz a1, .LBB1_2 ; RV32-NEXT: .LBB1_4: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -77,9 +76,8 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV64-NEXT: beqz a1, .LBB1_2 ; RV64-NEXT: .LBB1_4: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x half> @llvm.masked.expandload.v2f16(ptr align 2 %base, <2 x i1> %mask, <2 x half> %src0) @@ -114,9 +112,8 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV32-NEXT: beqz a2, .LBB2_2 ; RV32-NEXT: .LBB2_6: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -162,9 +159,8 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV64-NEXT: beqz a2, .LBB2_2 ; RV64-NEXT: .LBB2_6: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -227,9 +223,8 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: .LBB3_10: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -319,9 +314,8 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV64-NEXT: beqz a2, .LBB3_2 ; RV64-NEXT: .LBB3_10: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -425,9 +419,8 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV32-NEXT: beqz a1, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -451,9 +444,8 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV64-NEXT: beqz a1, .LBB5_2 ; RV64-NEXT: .LBB5_4: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x float> @llvm.masked.expandload.v2f32(ptr align 4 %base, <2 x i1> %mask, <2 x float> %src0) @@ -488,9 +480,8 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV32-NEXT: beqz a2, .LBB6_2 ; RV32-NEXT: .LBB6_6: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -536,9 +527,8 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV64-NEXT: beqz a2, .LBB6_2 ; RV64-NEXT: .LBB6_6: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -601,9 +591,8 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV32-NEXT: beqz a2, .LBB7_2 ; RV32-NEXT: .LBB7_10: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -693,9 +682,8 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV64-NEXT: beqz a2, .LBB7_2 ; RV64-NEXT: .LBB7_10: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -799,9 +787,8 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV32-NEXT: beqz a1, .LBB9_2 ; RV32-NEXT: .LBB9_4: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vfmv.s.f v9, fa5 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -825,9 +812,8 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV64-NEXT: beqz a1, .LBB9_2 ; RV64-NEXT: .LBB9_4: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vfmv.s.f v9, fa5 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x double> @llvm.masked.expandload.v2f64(ptr align 8 %base, <2 x i1> %mask, <2 x double> %src0) @@ -862,9 +848,8 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV32-NEXT: beqz a2, .LBB10_2 ; RV32-NEXT: .LBB10_6: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vfmv.s.f v10, fa5 ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -910,9 +895,8 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV64-NEXT: beqz a2, .LBB10_2 ; RV64-NEXT: .LBB10_6: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vfmv.s.f v10, fa5 ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -975,9 +959,8 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV32-NEXT: beqz a2, .LBB11_2 ; RV32-NEXT: .LBB11_10: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vfmv.s.f v12, fa5 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vfmv.s.f v12, fa5 ; RV32-NEXT: vslideup.vi v8, v12, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -1067,9 +1050,8 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV64-NEXT: beqz a2, .LBB11_2 ; RV64-NEXT: .LBB11_10: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vfmv.s.f v12, fa5 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vfmv.s.f v12, fa5 ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll index 5bf8b07efc1daf..4ba3394fb60303 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll @@ -40,9 +40,8 @@ define <2 x i8> @expandload_v2i8(ptr %base, <2 x i8> %src0, <2 x i1> %mask) { ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: .LBB1_4: # %cond.load1 ; CHECK-NEXT: lbu a0, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> %mask, <2 x i8> %src0) @@ -77,9 +76,8 @@ define <4 x i8> @expandload_v4i8(ptr %base, <4 x i8> %src0, <4 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB2_2 ; CHECK-NEXT: .LBB2_6: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -142,9 +140,8 @@ define <8 x i8> @expandload_v8i8(ptr %base, <8 x i8> %src0, <8 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB3_2 ; CHECK-NEXT: .LBB3_10: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -237,9 +234,8 @@ define <2 x i16> @expandload_v2i16(ptr %base, <2 x i16> %src0, <2 x i1> %mask) { ; CHECK-NEXT: beqz a1, .LBB5_2 ; CHECK-NEXT: .LBB5_4: # %cond.load1 ; CHECK-NEXT: lh a0, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.masked.expandload.v2i16(ptr align 2 %base, <2 x i1> %mask, <2 x i16> %src0) @@ -274,9 +270,8 @@ define <4 x i16> @expandload_v4i16(ptr %base, <4 x i16> %src0, <4 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB6_2 ; CHECK-NEXT: .LBB6_6: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -339,9 +334,8 @@ define <8 x i16> @expandload_v8i16(ptr %base, <8 x i16> %src0, <8 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB7_2 ; CHECK-NEXT: .LBB7_10: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -434,9 +428,8 @@ define <2 x i32> @expandload_v2i32(ptr %base, <2 x i32> %src0, <2 x i1> %mask) { ; CHECK-NEXT: beqz a1, .LBB9_2 ; CHECK-NEXT: .LBB9_4: # %cond.load1 ; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.masked.expandload.v2i32(ptr align 4 %base, <2 x i1> %mask, <2 x i32> %src0) @@ -471,9 +464,8 @@ define <4 x i32> @expandload_v4i32(ptr %base, <4 x i32> %src0, <4 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB10_2 ; CHECK-NEXT: .LBB10_6: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -536,9 +528,8 @@ define <8 x i32> @expandload_v8i32(ptr %base, <8 x i32> %src0, <8 x i1> %mask) { ; CHECK-NEXT: beqz a2, .LBB11_2 ; CHECK-NEXT: .LBB11_10: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -680,9 +671,8 @@ define <2 x i64> @expandload_v2i64(ptr %base, <2 x i64> %src0, <2 x i1> %mask) { ; RV64-NEXT: beqz a1, .LBB13_2 ; RV64-NEXT: .LBB13_4: # %cond.load1 ; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 %base, <2 x i1> %mask, <2 x i64> %src0) @@ -775,9 +765,8 @@ define <4 x i64> @expandload_v4i64(ptr %base, <4 x i64> %src0, <4 x i1> %mask) { ; RV64-NEXT: beqz a2, .LBB14_2 ; RV64-NEXT: .LBB14_6: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.s.x v10, a2 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v10, a2 ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -954,9 +943,8 @@ define <8 x i64> @expandload_v8i64(ptr %base, <8 x i64> %src0, <8 x i1> %mask) { ; RV64-NEXT: beqz a2, .LBB15_2 ; RV64-NEXT: .LBB15_10: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.s.x v12, a2 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v12, a2 ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 379a51f4eee301..96e0a31dd427f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -39,7 +39,7 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; CHECK-NEXT: vmul.vx v14, v12, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vadd.vi v8, v14, -14 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index a4f9eeb59cd5bf..93a97d667ec2fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -89,9 +89,8 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) @@ -138,9 +137,8 @@ define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB2_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -192,9 +190,8 @@ define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB3_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -246,9 +243,8 @@ define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB4_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -300,9 +296,8 @@ define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB5_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -361,9 +356,8 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -423,9 +417,8 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -484,9 +477,8 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 @@ -617,9 +609,8 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 @@ -889,9 +880,8 @@ define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthr ; RV64ZVE32F-NEXT: beqz a2, .LBB14_2 ; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) @@ -938,9 +928,8 @@ define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB15_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB15_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -992,9 +981,8 @@ define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB16_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB16_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -1053,9 +1041,8 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB17_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1117,9 +1104,8 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1180,9 +1166,8 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3 @@ -1313,9 +1298,8 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3 @@ -1414,9 +1398,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB23_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1441,9 +1424,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -1461,9 +1443,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_6 @@ -1474,9 +1455,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_7 @@ -1486,9 +1466,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8 @@ -1498,9 +1477,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB23_11 @@ -1511,9 +1489,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1566,9 +1543,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1593,9 +1569,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -1613,9 +1588,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_6 @@ -1626,9 +1600,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_7 @@ -1638,9 +1611,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8 @@ -1650,9 +1622,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB24_11 @@ -1663,9 +1634,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1719,9 +1689,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1747,9 +1716,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -1768,9 +1736,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_6 @@ -1782,9 +1749,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_7 @@ -1795,9 +1761,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8 @@ -1808,9 +1773,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB25_11 @@ -1822,9 +1786,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -2057,9 +2020,8 @@ define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthr ; RV64ZVE32F-NEXT: beqz a2, .LBB28_2 ; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) @@ -2115,9 +2077,8 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2175,9 +2136,8 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2238,9 +2198,8 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3 @@ -2370,9 +2329,8 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3 @@ -2470,9 +2428,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -2497,9 +2454,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2517,9 +2473,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 @@ -2530,9 +2485,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 @@ -2542,9 +2496,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 @@ -2554,9 +2507,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11 @@ -2567,9 +2519,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2621,9 +2572,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -2648,9 +2598,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2668,9 +2617,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6 @@ -2681,9 +2629,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7 @@ -2693,9 +2640,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 @@ -2705,9 +2651,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB36_11 @@ -2718,9 +2663,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2776,9 +2720,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -2804,9 +2747,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2825,9 +2767,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6 @@ -2839,9 +2780,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7 @@ -2852,9 +2792,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 @@ -2865,9 +2804,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB37_11 @@ -2879,9 +2817,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2935,9 +2872,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -2962,9 +2898,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -2982,9 +2917,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6 @@ -2995,9 +2929,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7 @@ -3007,9 +2940,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 @@ -3019,9 +2951,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB38_11 @@ -3032,9 +2963,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3087,9 +3017,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -3114,9 +3043,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -3134,9 +3062,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6 @@ -3147,9 +3074,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7 @@ -3159,9 +3085,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 @@ -3171,9 +3096,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB39_11 @@ -3184,9 +3108,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3243,9 +3166,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -3271,9 +3193,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 @@ -3292,9 +3213,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_6 @@ -3306,9 +3226,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_7 @@ -3319,9 +3238,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 @@ -3332,9 +3250,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_11 @@ -3346,9 +3263,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -6951,9 +6867,8 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1 ; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) @@ -7004,9 +6919,8 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 @@ -7137,9 +7051,8 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 @@ -7238,9 +7151,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7265,9 +7177,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -7285,9 +7196,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_6 @@ -7298,9 +7208,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_7 @@ -7310,9 +7219,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8 @@ -7322,9 +7230,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB64_11 @@ -7335,9 +7242,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7390,9 +7296,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7417,9 +7322,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -7437,9 +7341,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_6 @@ -7450,9 +7353,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_7 @@ -7462,9 +7364,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8 @@ -7474,9 +7375,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB65_11 @@ -7487,9 +7387,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7543,9 +7442,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7571,9 +7469,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -7592,9 +7489,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_6 @@ -7606,9 +7502,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_7 @@ -7619,9 +7514,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8 @@ -7632,9 +7526,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB66_11 @@ -7646,9 +7539,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7881,9 +7773,8 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 ; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1 ; RV64ZVE32F-NEXT: flw fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru) @@ -7934,9 +7825,8 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas ; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_3 @@ -8066,9 +7956,8 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_3 @@ -8166,9 +8055,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -8193,9 +8081,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8213,9 +8100,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 @@ -8226,9 +8112,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 @@ -8238,9 +8123,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 @@ -8250,9 +8134,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11 @@ -8263,9 +8146,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8317,9 +8199,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -8344,9 +8225,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8364,9 +8244,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_6 @@ -8377,9 +8256,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_7 @@ -8389,9 +8267,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 @@ -8401,9 +8278,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB75_11 @@ -8414,9 +8290,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8472,9 +8347,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -8500,9 +8374,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8521,9 +8394,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_6 @@ -8535,9 +8407,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_7 @@ -8548,9 +8419,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 @@ -8561,9 +8431,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB76_11 @@ -8575,9 +8444,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8631,9 +8499,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -8658,9 +8525,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8678,9 +8544,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_6 @@ -8691,9 +8556,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_7 @@ -8703,9 +8567,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 @@ -8715,9 +8578,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB77_11 @@ -8728,9 +8590,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8783,9 +8644,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -8810,9 +8670,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 @@ -8830,9 +8689,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 @@ -8843,9 +8701,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 @@ -8855,9 +8712,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 @@ -8867,9 +8723,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB78_11 @@ -8880,9 +8735,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8939,9 +8793,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -8967,9 +8820,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 @@ -8988,9 +8840,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_6 @@ -9002,9 +8853,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_7 @@ -9015,9 +8865,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 @@ -9028,9 +8877,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_11 @@ -9042,9 +8890,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 1dd74a7c9dd1b1..57c0069e1a3385 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1942,9 +1942,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6 @@ -2079,9 +2078,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6 @@ -2222,9 +2220,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6 @@ -2365,9 +2362,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6 @@ -2503,9 +2499,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6 @@ -2647,9 +2642,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_6 @@ -7062,9 +7056,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_6 @@ -7199,9 +7192,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_6 @@ -7342,9 +7334,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_6 @@ -7485,9 +7476,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_6 @@ -7623,9 +7613,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_6 @@ -7767,9 +7756,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_6 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 5a6364967eba25..79167cb4ab64e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -450,7 +450,7 @@ define void @saxpy_vec(i64 %n, float %a, ptr nocapture readonly %x, ptr nocaptur ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll index 20dfc7755d4c45..687c3ebeb56613 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll @@ -123,7 +123,6 @@ define @redundant_vsetvli(iXLen %avl, ptr %ptr) nounwind { define @repeated_vsetvli(iXLen %avl, ptr %ptr) nounwind { ; CHECK-LABEL: repeated_vsetvli: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, ma ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret