Skip to content

Commit

Permalink
[SimplifyCFG] Allow hoisting if all instructions in successors match.
Browse files Browse the repository at this point in the history
Generalize EqTermsOnly to allow hoisting if all all instructions in the
successor blocks match. This allows hoisting all instructions and removing
the blocks we are hoisting from, so does not add any new instructions.
  • Loading branch information
fhahn committed Nov 25, 2024
1 parent f81f47e commit fe2dd4b
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 116 deletions.
200 changes: 109 additions & 91 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class SimplifyCFGOpt {
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);

bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly);
bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
bool hoistSuccIdenticalTerminatorToSwitchOrIf(
Instruction *TI, Instruction *I1,
SmallVectorImpl<Instruction *> &OtherSuccTIs);
Expand Down Expand Up @@ -1772,13 +1772,84 @@ static bool isSafeCheapLoadStore(const Instruction *I,
getLoadStoreAlignment(I) < Value::MaximumAlignment;
}

namespace {

// LockstepReverseIterator - Iterates through instructions
// in a set of blocks in reverse order from the first non-terminator.
// For example (assume all blocks have size n):
// LockstepReverseIterator I([B1, B2, B3]);
// *I-- = [B1[n], B2[n], B3[n]];
// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
// ...
class LockstepReverseIterator {
ArrayRef<BasicBlock *> Blocks;
SmallVector<Instruction *, 4> Insts;
bool Fail;

public:
LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
reset();
}

void reset() {
Fail = false;
Insts.clear();
for (auto *BB : Blocks) {
Instruction *Inst = BB->getTerminator();
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
if (!Inst) {
// Block wasn't big enough.
Fail = true;
return;
}
Insts.push_back(Inst);
}
}

bool isValid() const { return !Fail; }

void operator--() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
// Already at beginning of block.
if (!Inst) {
Fail = true;
return;
}
}
}

void operator++() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getNextNode();
// Already at end of block.
if (!Inst) {
Fail = true;
return;
}
}
}

ArrayRef<Instruction *> operator*() const { return Insts; }
};

} // end anonymous namespace

/// Hoist any common code in the successor blocks up into the block. This
/// function guarantees that BB dominates all successors. If EqTermsOnly is
/// given, only perform hoisting in case both blocks only contain a terminator.
/// In that case, only the original BI will be replaced and selects for PHIs are
/// added.
/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
/// given, only perform hoisting in case all successors blocks contain matchin
/// instructions only In that case, all instructions can be hoisted and the
/// original branch will be replaced and selects for PHIs are added.
bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
bool EqTermsOnly) {
bool AllInstsEqOnly) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(N1*N2*...) situations here where Ni are the sizes of these successors. As
Expand Down Expand Up @@ -1807,17 +1878,39 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
}

// Check if only hoisting terminators is allowed. This does not add new
// instructions to the hoist location.
if (EqTermsOnly) {
// Skip any debug intrinsics, as they are free to hoist.
for (auto &SuccIter : make_first_range(SuccIterPairs)) {
auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
if (!INonDbg->isTerminator())
return false;
if (AllInstsEqOnly) {
// Check if all instructions in the successor blocks match. This allows
// hoisting all instructions and removing the blocks we are hoisting from,
// so does not add any new instructions.
bool AllSame = true;
SmallVector<BasicBlock *> Succs = to_vector(successors(BB));
// Check if sizes and terminators of all successors match.
if (any_of(Succs, [&Succs](BasicBlock *Succ) {
Instruction *Term0 = Succs[0]->getTerminator();
Instruction *Term = Succ->getTerminator();
if (!Term->isSameOperationAs(Term0) ||
!equal(Term->operands(), Term0->operands()))
return true;
return Succs[0]->size() != Succ->size();
})) {
AllSame = false;
} else {
LockstepReverseIterator LRI(Succs);
while (LRI.isValid()) {
Instruction *I0 = (*LRI)[0];
if (any_of(*LRI, [I0](Instruction *I) {
return !areIdenticalUpToCommutativity(I0, I);
})) {
AllSame = false;
break;
}
--LRI;
}
}
// Now we know that we only need to hoist debug intrinsics and the
// terminator. Let the loop below handle those 2 cases.
if (!AllSame)
return false;
// Now we know that all instructions in all successors can be hoisted Let
// the loop below handle the hoisting.
}

// Count how many instructions were not hoisted so far. There's a limit on how
Expand Down Expand Up @@ -2350,81 +2443,6 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}
}

namespace {

// LockstepReverseIterator - Iterates through instructions
// in a set of blocks in reverse order from the first non-terminator.
// For example (assume all blocks have size n):
// LockstepReverseIterator I([B1, B2, B3]);
// *I-- = [B1[n], B2[n], B3[n]];
// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
// ...
class LockstepReverseIterator {
ArrayRef<BasicBlock*> Blocks;
SmallVector<Instruction*,4> Insts;
bool Fail;

public:
LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
reset();
}

void reset() {
Fail = false;
Insts.clear();
for (auto *BB : Blocks) {
Instruction *Inst = BB->getTerminator();
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
if (!Inst) {
// Block wasn't big enough.
Fail = true;
return;
}
Insts.push_back(Inst);
}
}

bool isValid() const {
return !Fail;
}

void operator--() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
// Already at beginning of block.
if (!Inst) {
Fail = true;
return;
}
}
}

void operator++() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getNextNode();
// Already at end of block.
if (!Inst) {
Fail = true;
return;
}
}
}

ArrayRef<Instruction*> operator * () const {
return Insts;
}
};

} // end anonymous namespace

/// Check whether BB's predecessors end with unconditional branches. If it is
/// true, sink any common code from the predecessors to BB.
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
Expand Down
21 changes: 6 additions & 15 deletions llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,10 @@ define i32 @foo(i32 %i) nounwind ssp !dbg !0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: #dbg_value(i32 [[I:%.*]], [[META7:![0-9]+]], !DIExpression(), [[META8:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 0, [[META9:![0-9]+]], !DIExpression(), [[META11:![0-9]+]])
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], 0, !dbg [[DBG12:![0-9]+]]
; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]], !dbg [[DBG12]]
; CHECK: then:
; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG13:![0-9]+]]
; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[DBG13]])
; CHECK-NEXT: br label [[EXIT:%.*]], !dbg [[DBG15:![0-9]+]]
; CHECK: else:
; CHECK-NEXT: [[CALL_2:%.*]] = call i32 (...) @bar(), !dbg [[DBG16:![0-9]+]]
; CHECK-NEXT: #dbg_value(i32 [[CALL_2]], [[META9]], !DIExpression(), [[DBG16]])
; CHECK-NEXT: br label [[EXIT]], !dbg [[DBG18:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[K_0:%.*]] = phi i32 [ [[CALL_1]], [[THEN]] ], [ [[CALL_2]], [[ELSE]] ]
; CHECK-NEXT: ret i32 [[K_0]], !dbg [[DBG19:![0-9]+]]
; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG12:![0-9]+]]
; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[META13:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[META15:![0-9]+]])
; CHECK-NEXT: ret i32 [[CALL_1]], !dbg [[DBG17:![0-9]+]]
;
entry:
call void @llvm.dbg.value(metadata i32 %i, metadata !6, metadata !DIExpression()), !dbg !7
Expand All @@ -46,8 +37,8 @@ define i1 @hoist_with_debug2(i32 %x) !dbg !22 {
; CHECK-LABEL: @hoist_with_debug2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ugt i32 [[X:%.*]], 2
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21:![0-9]+]], !DIExpression(), [[META23:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21]], !DIExpression(), [[META23]])
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META19:![0-9]+]], !DIExpression(), [[META21:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META19]], !DIExpression(), [[META21]])
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL_NOT]], i1 false, i1 true
; CHECK-NEXT: ret i1 [[DOT]]
;
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/SimplifyCFG/hoisting-metadata.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@ define i64 @hoist_load_with_matching_pointers_and_tbaa(i1 %c) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8
; CHECK-NEXT: call void @init(ptr [[TMP]])
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 [[TMP0]], i64 [[TMP1]]
; CHECK-NEXT: [[P:%.*]] = load i64, ptr [[TMP]], align 8, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: ret i64 [[P]]
;
entry:
Expand Down Expand Up @@ -74,11 +70,7 @@ define i64 @hoist_load_with_different_tbaa(i1 %c) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8
; CHECK-NEXT: call void @init(ptr [[TMP]])
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 [[TMP0]], i64 [[TMP1]]
; CHECK-NEXT: [[P:%.*]] = load i64, ptr [[TMP]], align 8, !tbaa [[TBAA5:![0-9]+]]
; CHECK-NEXT: ret i64 [[P]]
;
entry:
Expand Down Expand Up @@ -135,3 +127,11 @@ exit:
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
!5 = !{!3, !3, i64 0}
;.
; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
; CHECK: [[META1]] = !{!"p2 long long", [[META2:![0-9]+]], i64 0}
; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
; CHECK: [[TBAA5]] = !{[[META3]], [[META3]], i64 0}
;.

0 comments on commit fe2dd4b

Please sign in to comment.