Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Relax RISCVInsertVSETVLI output VL peeking to cover registers #96200

Merged
merged 2 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 20 additions & 21 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,17 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());

// If AVL is defined by a vsetvli with the same VLMAX, we can replace the
// AVL operand with the AVL of the defining vsetvli.
if (NewInfo.hasAVLReg()) {
if (const MachineInstr *DefMI = NewInfo.getAVLDefMI(LIS);
DefMI && isVectorConfigInstr(*DefMI)) {
VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
if (DefInstrInfo.hasSameVLMAX(NewInfo))
NewInfo.setAVL(DefInstrInfo);
}
}

return NewInfo;
}

Expand Down Expand Up @@ -1050,15 +1061,12 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);

// If AVL is defined by a vsetvli with the same VLMAX, we can replace the
// AVL operand with the AVL of the defining vsetvli. We avoid general
// register AVLs to avoid extending live ranges without being sure we can
// kill the original source reg entirely.
// AVL operand with the AVL of the defining vsetvli.
if (InstrInfo.hasAVLReg()) {
if (const MachineInstr *DefMI = InstrInfo.getAVLDefMI(LIS);
DefMI && isVectorConfigInstr(*DefMI)) {
VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
(DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
if (DefInstrInfo.hasSameVLMAX(InstrInfo))
InstrInfo.setAVL(DefInstrInfo);
}
}
Expand Down Expand Up @@ -1146,9 +1154,13 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
LIS->InsertMachineInstrInMaps(*MI);
// Normally the AVL's live range will already extend past the inserted
// vsetvli because the pseudos below will already use the AVL. But this
// isn't always the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
LIS->getInterval(AVLReg).extendInBlock(
LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
// isn't always the case, e.g. PseudoVMV_X_S doesn't have an AVL operand or
// we've taken the AVL from the VL output of another vsetvli.
LiveInterval &LI = LIS->getInterval(AVLReg);
// Need to get non-const VNInfo
VNInfo *VNI = LI.getValNumInfo(Info.getAVLVNInfo()->id);
LI.addSegment(LiveInterval::Segment(
VNI->def, LIS->getInstructionIndex(*MI).getRegSlot(), VNI));
}
}

Expand All @@ -1163,19 +1175,6 @@ bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
if (CurInfo.isCompatible(Used, Require, LIS))
return false;

// We didn't find a compatible value. If our AVL is a virtual register,
// it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
if (const MachineInstr *DefMI = Require.getAVLDefMI(LIS);
DefMI && isVectorConfigInstr(*DefMI)) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
return false;
}
}

return true;
}

Expand Down
48 changes: 25 additions & 23 deletions llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -234,41 +234,41 @@ if.end6: ; preds = %if.else5, %if.then4
define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a3, a1, 1
; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma
; CHECK-NEXT: bnez a3, .LBB5_3
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: bnez a2, .LBB5_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v8, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: beqz a1, .LBB5_4
; CHECK-NEXT: .LBB5_2: # %if.then4
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vlse64.v v9, (a0), zero
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1)
; CHECK-NEXT: vlse64.v v10, (a0), zero
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI5_0)
; CHECK-NEXT: vlse64.v v9, (a1), zero
; CHECK-NEXT: lui a1, %hi(.LCPI5_1)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI5_1)
; CHECK-NEXT: vlse64.v v10, (a1), zero
; CHECK-NEXT: vfadd.vv v9, v9, v10
; CHECK-NEXT: lui a0, %hi(scratch)
; CHECK-NEXT: addi a0, a0, %lo(scratch)
; CHECK-NEXT: vse64.v v9, (a0)
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: j .LBB5_5
; CHECK-NEXT: .LBB5_3: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: bnez a1, .LBB5_2
; CHECK-NEXT: .LBB5_4: # %if.else5
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: lui a0, 260096
; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: lui a0, 262144
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: lui a1, 260096
; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: lui a1, 262144
; CHECK-NEXT: vmv.v.x v10, a1
; CHECK-NEXT: vfadd.vv v9, v9, v10
; CHECK-NEXT: lui a0, %hi(scratch)
; CHECK-NEXT: addi a0, a0, %lo(scratch)
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse32.v v9, (a1)
; CHECK-NEXT: .LBB5_5: # %if.end10
; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmul.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -328,7 +328,8 @@ define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: vsetvli s0, a0, e64, m1, ta, ma
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The mv here comes from the call below which clobbers a0, but I don't think this is a regression anyway since we're removing the true dependency

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. And I think the hardware may implement mv elimination.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in-order cores can't really implement mv elimination since it requires register renaming.

; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
Expand Down Expand Up @@ -387,7 +388,8 @@ define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: vsetvli s0, a0, e64, m1, ta, ma
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: beqz a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
Expand Down Expand Up @@ -722,7 +724,7 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB14_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli zero, a3, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: add a2, a2, a3
; CHECK-NEXT: add a1, a1, a4
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,6 @@ entry:
define <vscale x 1 x double> @test14(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test14:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: .LBB0_3: # %do.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: add a3, a3, a4
Expand Down
Loading