-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoopVectorize] Teach LoopVectorizationLegality about more early exits #107004
Changes from all commits
ec58810
d0cf2c6
0087f54
64942a3
2ee84a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -377,6 +377,24 @@ class LoopVectorizationLegality { | |
return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); | ||
} | ||
|
||
/// Returns true if the loop has a speculative early exit, i.e. an | ||
/// uncountable exit that isn't the latch block. | ||
bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; } | ||
|
||
/// Returns the speculative early exiting block. | ||
BasicBlock *getSpeculativeEarlyExitingBlock() const { | ||
assert(getUncountableExitingBlocks().size() == 1 && | ||
"Expected only a single uncountable exiting block"); | ||
return getUncountableExitingBlocks()[0]; | ||
} | ||
|
||
/// Returns the destination of a speculative early exiting block. | ||
BasicBlock *getSpeculativeEarlyExitBlock() const { | ||
assert(getUncountableExitBlocks().size() == 1 && | ||
"Expected only a single uncountable exit block"); | ||
return getUncountableExitBlocks()[0]; | ||
} | ||
|
||
/// Returns true if vector representation of the instruction \p I | ||
/// requires mask. | ||
bool isMaskRequired(const Instruction *I) const { | ||
|
@@ -404,6 +422,22 @@ class LoopVectorizationLegality { | |
|
||
DominatorTree *getDominatorTree() const { return DT; } | ||
|
||
/// Returns all exiting blocks with a countable exit, i.e. the | ||
/// exit-not-taken count is known exactly at compile time. | ||
const SmallVector<BasicBlock *, 4> &getCountableExitingBlocks() const { | ||
return CountableExitingBlocks; | ||
} | ||
|
||
/// Returns all the exiting blocks with an uncountable exit. | ||
const SmallVector<BasicBlock *, 4> &getUncountableExitingBlocks() const { | ||
return UncountableExitingBlocks; | ||
} | ||
|
||
/// Returns all the exit blocks from uncountable exiting blocks. | ||
SmallVector<BasicBlock *, 4> getUncountableExitBlocks() const { | ||
return UncountableExitBlocks; | ||
} | ||
|
||
private: | ||
/// Return true if the pre-header, exiting and latch blocks of \p Lp and all | ||
/// its nested loops are considered legal for vectorization. These legal | ||
|
@@ -446,6 +480,23 @@ class LoopVectorizationLegality { | |
/// specific checks for outer loop vectorization. | ||
bool canVectorizeOuterLoop(); | ||
|
||
/// Returns true if this is an early exit loop that can be vectorized. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would probably be helpful to summarize the required conditions here (or somewhere else) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
/// Currently, a loop with an uncountable early exit is considered | ||
/// vectorizable if: | ||
/// 1. There are no writes to memory in the loop. | ||
/// 2. The loop has only one early uncountable exit | ||
/// 3. The early exit block dominates the latch block. | ||
/// 4. The latch block has an exact exit count. | ||
/// 5. The loop does not contain reductions or recurrences. | ||
/// 6. We can prove at compile-time that loops will not contain faulting | ||
/// loads. | ||
/// 7. It is safe to speculatively execute instructions such as divide or | ||
/// call instructions. | ||
/// The list above is not based on theoretical limitations of vectorization, | ||
/// but simply a statement that more work is needed to support these | ||
/// additional cases safely. | ||
bool isVectorizableEarlyExitLoop(); | ||
|
||
/// Return true if all of the instructions in the block can be speculatively | ||
/// executed, and record the loads/stores that require masking. | ||
/// \p SafePtrs is a list of addresses that are known to be legal and we know | ||
|
@@ -551,6 +602,17 @@ class LoopVectorizationLegality { | |
/// (potentially) make a better decision on the maximum VF and enable | ||
/// the use of those function variants. | ||
bool VecCallVariantsFound = false; | ||
|
||
/// Indicates whether this loop has a speculative early exit, i.e. an | ||
/// uncountable exiting block that is not the latch. | ||
bool HasSpeculativeEarlyExit = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency, with SCEV terminology and the variables below, |
||
|
||
/// Keep track of all the loop exiting blocks. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this comment accurate? AFAICT they only keep track of a single countable and uncountable exiting block, if the backedge taken count is not computable. |
||
SmallVector<BasicBlock *, 4> CountableExitingBlocks; | ||
SmallVector<BasicBlock *, 4> UncountableExitingBlocks; | ||
|
||
/// Keep track of the destinations of all uncountable exits. | ||
SmallVector<BasicBlock *, 4> UncountableExitBlocks; | ||
Comment on lines
+610
to
+615
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of the three only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, these will be used in follow-on patches. However, if you prefer to keep this initial patch clean I can remove them and add them later. I was just trying to reduce the size of follow-on patches by moving it into this patch that's all. |
||
}; | ||
|
||
} // namespace llvm | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1445,6 +1445,145 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG( | |
return Result; | ||
} | ||
|
||
bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { | ||
BasicBlock *LatchBB = TheLoop->getLoopLatch(); | ||
if (!LatchBB) { | ||
reportVectorizationFailure("Loop does not have a latch", | ||
"Cannot vectorize early exit loop", | ||
"NoLatchEarlyExit", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
if (Reductions.size() || FixedOrderRecurrences.size()) { | ||
reportVectorizationFailure( | ||
"Found reductions or recurrences in early-exit loop", | ||
"Cannot vectorize early exit loop with reductions or recurrences", | ||
"RecurrencesInEarlyExitLoop", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
SmallVector<BasicBlock *, 8> ExitingBlocks; | ||
TheLoop->getExitingBlocks(ExitingBlocks); | ||
|
||
// Keep a record of all the exiting blocks. | ||
SmallVector<const SCEVPredicate *, 4> Predicates; | ||
for (BasicBlock *BB1 : ExitingBlocks) { | ||
const SCEV *EC = | ||
PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that's true because if the loop really does require predicates then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I didn't see this parameter controlling any code paths but I do see an assert that would fail without it. What worried me was that you're clearing Predicates after iterating across all blocks and that made me assume it's contents didn't matter, but then I wondered why you need to manually clear it at all? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a comment here or below why predicates is passed and dropped below? |
||
if (isa<SCEVCouldNotCompute>(EC)) { | ||
UncountableExitingBlocks.push_back(BB1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it matter if BB1 has no out-of-loop successors? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's an exiting block at least one successor must be outside the loop. I've added an assert for this. |
||
|
||
SmallVector<BasicBlock *, 2> Succs(successors(BB1)); | ||
if (Succs.size() != 2) { | ||
reportVectorizationFailure( | ||
"Early exiting block does not have exactly two successors", | ||
"Incorrect number of successors from early exiting block", | ||
"EarlyExitTooManySuccessors", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
BasicBlock *BB2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clearer to name ExitBlock? |
||
if (!TheLoop->contains(Succs[0])) | ||
BB2 = Succs[0]; | ||
else { | ||
assert(!TheLoop->contains(Succs[1])); | ||
BB2 = Succs[1]; | ||
} | ||
UncountableExitBlocks.push_back(BB2); | ||
} else | ||
CountableExitingBlocks.push_back(BB1); | ||
} | ||
Predicates.clear(); | ||
|
||
// We only support one uncountable early exit. | ||
if (getUncountableExitingBlocks().size() != 1) { | ||
reportVectorizationFailure( | ||
"Loop has too many uncountable exits", | ||
"Cannot vectorize early exit loop with more than one early exit", | ||
"TooManyUncountableEarlyExits", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
// The only supported early exit loops so far are ones where the early | ||
// exiting block is a unique predecessor of the latch block. | ||
Comment on lines
+1507
to
+1508
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This answers my earlier question but that means the code in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor(); | ||
if (LatchPredBB != getSpeculativeEarlyExitingBlock()) { | ||
reportVectorizationFailure("Early exit is not the latch predecessor", | ||
"Cannot vectorize early exit loop", | ||
"EarlyExitNotLatchPredecessor", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
// Check to see if there are instructions that could potentially generate | ||
// exceptions or have side-effects. | ||
auto IsSafeOperation = [](Instruction *I) -> bool { | ||
switch (I->getOpcode()) { | ||
case Instruction::Load: | ||
case Instruction::Store: | ||
case Instruction::PHI: | ||
case Instruction::Br: | ||
// These are checked separately. | ||
return true; | ||
default: | ||
return isSafeToSpeculativelyExecute(I); | ||
} | ||
}; | ||
|
||
for (auto *BB : TheLoop->blocks()) | ||
for (auto &I : *BB) { | ||
if (I.mayWriteToMemory()) { | ||
// We don't support writes to memory. | ||
reportVectorizationFailure( | ||
"Writes to memory unsupported in early exit loops", | ||
"Cannot vectorize early exit loop with writes to memory", | ||
"WritesInEarlyExitLoop", ORE, TheLoop); | ||
return false; | ||
} else if (!IsSafeOperation(&I)) { | ||
reportVectorizationFailure("Early exit loop contains operations that " | ||
"cannot be speculatively executed", | ||
"Early exit loop contains operations that " | ||
"cannot be speculatively executed", | ||
"UnsafeOperationsEarlyExitLoop", ORE, | ||
TheLoop); | ||
return false; | ||
} | ||
} | ||
|
||
// The latch block must have a countable exit. | ||
if (isa<SCEVCouldNotCompute>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move simpler checks earlier before iterating over the whole loop? Can also check if LatchBB is in the collected countable exiting blocks? |
||
PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) { | ||
reportVectorizationFailure( | ||
"Cannot determine exact exit count for latch block", | ||
"Cannot vectorize early exit loop", | ||
"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
// The vectoriser cannot handle loads that occur after the early exit block. | ||
assert(LatchBB->getUniquePredecessor() == getSpeculativeEarlyExitingBlock() && | ||
"Expected latch predecessor to be the early exiting block"); | ||
|
||
// TODO: Handle loops that may fault. | ||
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) { | ||
reportVectorizationFailure( | ||
"Loop may fault", | ||
"Cannot vectorize potentially faulting early exit loop", | ||
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); | ||
return false; | ||
} | ||
|
||
LLVM_DEBUG( | ||
dbgs() | ||
<< "LV: Found an early exit. Retrying with speculative exit count.\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be good to be consistent with SCEV terminology, i.e. symbolic max BTC. Or more specifically the computable exiting count for the loop latch, as with the current restrictions |
||
const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
assert(!isa<SCEVCouldNotCompute>(SpecExitCount) && | ||
"Failed to get symbolic expression for backedge taken count"); | ||
|
||
LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: " | ||
<< *SpecExitCount << '\n'); | ||
return true; | ||
} | ||
|
||
bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { | ||
// Store the result and return it at the end instead of exiting early, in case | ||
// allowExtraAnalysis is used to report multiple reasons for not vectorizing. | ||
|
@@ -1505,6 +1644,17 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { | |
return false; | ||
} | ||
|
||
HasSpeculativeEarlyExit = false; | ||
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) { | ||
if (!isVectorizableEarlyExitLoop()) { | ||
if (DoExtraAnalysis) | ||
Result = false; | ||
else | ||
return false; | ||
} else | ||
HasSpeculativeEarlyExit = true; | ||
} | ||
|
||
// Go over each instruction and look at memory deps. | ||
if (!canVectorizeMemory()) { | ||
LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); | ||
|
@@ -1514,16 +1664,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { | |
return false; | ||
} | ||
|
||
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) { | ||
reportVectorizationFailure("could not determine number of loop iterations", | ||
"could not determine number of loop iterations", | ||
"CantComputeNumberOfIterations", ORE, TheLoop); | ||
if (DoExtraAnalysis) | ||
Result = false; | ||
else | ||
return false; | ||
} | ||
|
||
if (Result) { | ||
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" | ||
<< (LAI->getRuntimePointerChecking()->Need | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both
getSpeculativeEarlyExitBloc
andgetUncountableExitingBlocks
assert that there's exactly one entry. Would it be simpler to store the pointers directly, rather than in a 1 element vector?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could do that, and would be happy to do so if people prefer. I wrote it this way because in future I thought it's possible we may support multiple early exits. @paulwalker-arm @huntergr-arm any preference?