Skip to content

Commit

Permalink
[RISCV] Support post-regalloc vsetvli pass
Browse files Browse the repository at this point in the history
  • Loading branch information
BeMg committed May 14, 2024
1 parent e799314 commit e84993b
Show file tree
Hide file tree
Showing 249 changed files with 10,385 additions and 9,574 deletions.
19 changes: 15 additions & 4 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ static cl::opt<bool> EnableMISchedLoadClustering(
cl::desc("Enable load clustering in the machine scheduler"),
cl::init(false));

static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
"riscv-vsetvli-after-rvv-regalloc", cl::Hidden,
cl::desc("vsetvl insertion after rvv regalloc"), cl::init(true));

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
Expand Down Expand Up @@ -389,6 +393,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {

bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
if (EnableVSETVLIAfterRVVRegAlloc)
addPass(createRISCVInsertVSETVLIPass());
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
Expand All @@ -399,6 +405,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
if (EnableVSETVLIAfterRVVRegAlloc)
addPass(createRISCVInsertVSETVLIPass());
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
Expand Down Expand Up @@ -545,10 +553,13 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVInsertWriteVXRMPass());
// Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
// register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
if (TM->getOptLevel() == CodeGenOptLevel::None)
insertPass(&PHIEliminationID, createRISCVInsertVSETVLIPass());
else
insertPass(&RegisterCoalescerID, createRISCVInsertVSETVLIPass());
if (!EnableVSETVLIAfterRVVRegAlloc) {
if (TM->getOptLevel() == CodeGenOptLevel::None)
insertPass(&PHIEliminationID, createRISCVInsertVSETVLIPass());
else
insertPass(&RegisterCoalescerID, createRISCVInsertVSETVLIPass());
}

}

void RISCVPassConfig::addFastRegAlloc() {
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,14 @@
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@
; CHECK-NEXT: RISC-V Merge Base Offset
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
Expand All @@ -129,7 +128,6 @@
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
; CHECK-NEXT: Machine Block Frequency Analysis
Expand All @@ -143,6 +141,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: RISC-V Dead register definitions
; CHECK-NEXT: Virtual Register Map
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,36 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
Expand All @@ -71,12 +71,12 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, 1048572
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
; CHECK-NEXT: lui a0, %hi(var_47)
; CHECK-NEXT: addi a0, a0, %lo(var_47)
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV32-NEXT: sw a0, 16(sp)
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: vid.v v8
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: vid.v v16
; RV32-NEXT: li a2, -1
; RV32-NEXT: vmadd.vx v8, a2, v16
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmadd.vx v16, a2, v8
; RV32-NEXT: addi a2, sp, 32
; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vmsne.vi v0, v16, 0
; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: vmerge.vim v16, v16, -1, v0
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
; RV32-NEXT: vand.vv v8, v16, v8
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a2, v8
; RV32-NEXT: sltu a3, a0, a2
Expand All @@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV64: # %bb.0:
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.x v24, a0
; RV64-NEXT: vid.v v16
; RV64-NEXT: vmv.v.x v16, a0
; RV64-NEXT: vid.v v24
; RV64-NEXT: li a1, -1
; RV64-NEXT: vmadd.vx v16, a1, v24
; RV64-NEXT: vmadd.vx v24, a1, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
Expand Down
Loading

0 comments on commit e84993b

Please sign in to comment.