llvm · david-arm · Sep 19, 2024 · Sep 2, 2024 · Sep 10, 2024 · Sep 16, 2024
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -377,6 +377,24 @@ class LoopVectorizationLegality {
     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
   }
 
+  /// Returns true if the loop has a speculative early exit, i.e. an
+  /// uncountable exit that isn't the latch block.
+  bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; }
+
+  /// Returns the speculative early exiting block.
+  BasicBlock *getSpeculativeEarlyExitingBlock() const {
+    assert(getUncountableExitingBlocks().size() == 1 &&
+           "Expected only a single uncountable exiting block");
+    return getUncountableExitingBlocks()[0];
+  }
+
+  /// Returns the destination of a speculative early exiting block.
+  BasicBlock *getSpeculativeEarlyExitBlock() const {
+    assert(getUncountableExitBlocks().size() == 1 &&
+           "Expected only a single uncountable exit block");
+    return getUncountableExitBlocks()[0];
+  }
+
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
   bool isMaskRequired(const Instruction *I) const {
@@ -404,6 +422,22 @@ class LoopVectorizationLegality {
 
   DominatorTree *getDominatorTree() const { return DT; }
 
+  /// Returns all exiting blocks with a countable exit, i.e. the
+  /// exit-not-taken count is known exactly at compile time.
+  const SmallVector<BasicBlock *, 4> &getCountableExitingBlocks() const {
+    return CountableExitingBlocks;
+  }
+
+  /// Returns all the exiting blocks with an uncountable exit.
+  const SmallVector<BasicBlock *, 4> &getUncountableExitingBlocks() const {
+    return UncountableExitingBlocks;
+  }
+
+  /// Returns all the exit blocks from uncountable exiting blocks.
+  SmallVector<BasicBlock *, 4> getUncountableExitBlocks() const {
+    return UncountableExitBlocks;
+  }
+
 private:
   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
   /// its nested loops are considered legal for vectorization. These legal
@@ -446,6 +480,23 @@ class LoopVectorizationLegality {
   /// specific checks for outer loop vectorization.
   bool canVectorizeOuterLoop();
 
+  /// Returns true if this is an early exit loop that can be vectorized.
+  /// Currently, a loop with an uncountable early exit is considered
+  /// vectorizable if:
+  ///   1. There are no writes to memory in the loop.
+  ///   2. The loop has only one early uncountable exit
+  ///   3. The early exit block dominates the latch block.
+  ///   4. The latch block has an exact exit count.
+  ///   5. The loop does not contain reductions or recurrences.
+  ///   6. We can prove at compile-time that loops will not contain faulting
+  ///   loads.
+  ///   7. It is safe to speculatively execute instructions such as divide or
+  ///   call instructions.
+  /// The list above is not based on theoretical limitations of vectorization,
+  /// but simply a statement that more work is needed to support these
+  /// additional cases safely.
+  bool isVectorizableEarlyExitLoop();
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
   /// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -551,6 +602,17 @@ class LoopVectorizationLegality {
   /// (potentially) make a better decision on the maximum VF and enable
   /// the use of those function variants.
   bool VecCallVariantsFound = false;
+
+  /// Indicates whether this loop has a speculative early exit, i.e. an
+  /// uncountable exiting block that is not the latch.
+  bool HasSpeculativeEarlyExit = false;
+
+  /// Keep track of all the loop exiting blocks.
+  SmallVector<BasicBlock *, 4> CountableExitingBlocks;
+  SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
+
+  /// Keep track of the destinations of all uncountable exits.
+  SmallVector<BasicBlock *, 4> UncountableExitBlocks;
 };
 
 } // namespace llvm

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1445,6 +1445,145 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
   return Result;
 }
 
+bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
+  BasicBlock *LatchBB = TheLoop->getLoopLatch();
+  if (!LatchBB) {
+    reportVectorizationFailure("Loop does not have a latch",
+                               "Cannot vectorize early exit loop",
+                               "NoLatchEarlyExit", ORE, TheLoop);
+    return false;
+  }
+
+  if (Reductions.size() || FixedOrderRecurrences.size()) {
+    reportVectorizationFailure(
+        "Found reductions or recurrences in early-exit loop",
+        "Cannot vectorize early exit loop with reductions or recurrences",
+        "RecurrencesInEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  TheLoop->getExitingBlocks(ExitingBlocks);
+
+  // Keep a record of all the exiting blocks.
+  SmallVector<const SCEVPredicate *, 4> Predicates;
+  for (BasicBlock *BB1 : ExitingBlocks) {
+    const SCEV *EC =
+        PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates);
+    if (isa<SCEVCouldNotCompute>(EC)) {
+      UncountableExitingBlocks.push_back(BB1);
+
+      SmallVector<BasicBlock *, 2> Succs(successors(BB1));
+      if (Succs.size() != 2) {
+        reportVectorizationFailure(
+            "Early exiting block does not have exactly two successors",
+            "Incorrect number of successors from early exiting block",
+            "EarlyExitTooManySuccessors", ORE, TheLoop);
+        return false;
+      }
+
+      BasicBlock *BB2;
+      if (!TheLoop->contains(Succs[0]))
+        BB2 = Succs[0];
+      else {
+        assert(!TheLoop->contains(Succs[1]));
+        BB2 = Succs[1];
+      }
+      UncountableExitBlocks.push_back(BB2);
+    } else
+      CountableExitingBlocks.push_back(BB1);
+  }
+  Predicates.clear();
+
+  // We only support one uncountable early exit.
+  if (getUncountableExitingBlocks().size() != 1) {
+    reportVectorizationFailure(
+        "Loop has too many uncountable exits",
+        "Cannot vectorize early exit loop with more than one early exit",
+        "TooManyUncountableEarlyExits", ORE, TheLoop);
+    return false;
+  }
+
+  // The only supported early exit loops so far are ones where the early
+  // exiting block is a unique predecessor of the latch block.
+  BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
+  if (LatchPredBB != getSpeculativeEarlyExitingBlock()) {
+    reportVectorizationFailure("Early exit is not the latch predecessor",
+                               "Cannot vectorize early exit loop",
+                               "EarlyExitNotLatchPredecessor", ORE, TheLoop);
+    return false;
+  }
+
+  // Check to see if there are instructions that could potentially generate
+  // exceptions or have side-effects.
+  auto IsSafeOperation = [](Instruction *I) -> bool {
+    switch (I->getOpcode()) {
+    case Instruction::Load:
+    case Instruction::Store:
+    case Instruction::PHI:
+    case Instruction::Br:
+      // These are checked separately.
+      return true;
+    default:
+      return isSafeToSpeculativelyExecute(I);
+    }
+  };
+
+  for (auto *BB : TheLoop->blocks())
+    for (auto &I : *BB) {
+      if (I.mayWriteToMemory()) {
+        // We don't support writes to memory.
+        reportVectorizationFailure(
+            "Writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with writes to memory",
+            "WritesInEarlyExitLoop", ORE, TheLoop);
+        return false;
+      } else if (!IsSafeOperation(&I)) {
+        reportVectorizationFailure("Early exit loop contains operations that "
+                                   "cannot be speculatively executed",
+                                   "Early exit loop contains operations that "
+                                   "cannot be speculatively executed",
+                                   "UnsafeOperationsEarlyExitLoop", ORE,
+                                   TheLoop);
+        return false;
+      }
+    }
+
+  // The latch block must have a countable exit.
+  if (isa<SCEVCouldNotCompute>(
+          PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
+    reportVectorizationFailure(
+        "Cannot determine exact exit count for latch block",
+        "Cannot vectorize early exit loop",
+        "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
+  // The vectoriser cannot handle loads that occur after the early exit block.
+  assert(LatchBB->getUniquePredecessor() == getSpeculativeEarlyExitingBlock() &&
+         "Expected latch predecessor to be the early exiting block");
+
+  // TODO: Handle loops that may fault.
+  if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
+    reportVectorizationFailure(
+        "Loop may fault",
+        "Cannot vectorize potentially faulting early exit loop",
+        "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
+  LLVM_DEBUG(
+      dbgs()
+      << "LV: Found an early exit. Retrying with speculative exit count.\n");
+  const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount();
+  assert(!isa<SCEVCouldNotCompute>(SpecExitCount) &&
+         "Failed to get symbolic expression for backedge taken count");
+
+  LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: "
+                    << *SpecExitCount << '\n');
+  return true;
+}
+
 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   // Store the result and return it at the end instead of exiting early, in case
   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
@@ -1505,6 +1644,17 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
+  HasSpeculativeEarlyExit = false;
+  if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
+    if (!isVectorizableEarlyExitLoop()) {
+      if (DoExtraAnalysis)
+        Result = false;
+      else
+        return false;
+    } else
+      HasSpeculativeEarlyExit = true;
+  }
+
   // Go over each instruction and look at memory deps.
   if (!canVectorizeMemory()) {
     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
@@ -1514,16 +1664,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-  if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
-    reportVectorizationFailure("could not determine number of loop iterations",
-                               "could not determine number of loop iterations",
-                               "CantComputeNumberOfIterations", ORE, TheLoop);
-    if (DoExtraAnalysis)
-      Result = false;
-    else
-      return false;
-  }
-
   if (Result) {
     LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
                       << (LAI->getRuntimePointerChecking()->Need

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9815,6 +9815,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     return false;
   }
 
+  if (LVL.hasSpeculativeEarlyExit()) {
+    reportVectorizationFailure(
+        "Auto-vectorization of early exit loops is not yet supported.",
+        "Auto-vectorization of early exit loops is not yet supported.",
+        "EarlyExitLoopsUnsupported", ORE, L);
+    return false;
+  }
+
   // Entrance to the VPlan-native vectorization path. Outer loops are processed
   // here. They may require CFG and instruction level transformations before
   // even evaluating whether vectorization is profitable. Since we cannot modify