diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 0f4d1355dd2bfe..091061442ae120 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -377,6 +377,24 @@ class LoopVectorizationLegality { return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); } + /// Returns true if the loop has a speculative early exit, i.e. an + /// uncountable exit that isn't the latch block. + bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; } + + /// Returns the speculative early exiting block. + BasicBlock *getSpeculativeEarlyExitingBlock() const { + assert(getUncountableExitingBlocks().size() == 1 && + "Expected only a single uncountable exiting block"); + return getUncountableExitingBlocks()[0]; + } + + /// Returns the destination of a speculative early exiting block. + BasicBlock *getSpeculativeEarlyExitBlock() const { + assert(getUncountableExitBlocks().size() == 1 && + "Expected only a single uncountable exit block"); + return getUncountableExitBlocks()[0]; + } + /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) const { @@ -404,6 +422,22 @@ class LoopVectorizationLegality { DominatorTree *getDominatorTree() const { return DT; } + /// Returns all exiting blocks with a countable exit, i.e. the + /// exit-not-taken count is known exactly at compile time. + const SmallVector &getCountableExitingBlocks() const { + return CountableExitingBlocks; + } + + /// Returns all the exiting blocks with an uncountable exit. + const SmallVector &getUncountableExitingBlocks() const { + return UncountableExitingBlocks; + } + + /// Returns all the exit blocks from uncountable exiting blocks. + SmallVector getUncountableExitBlocks() const { + return UncountableExitBlocks; + } + private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal @@ -446,6 +480,23 @@ class LoopVectorizationLegality { /// specific checks for outer loop vectorization. bool canVectorizeOuterLoop(); + /// Returns true if this is an early exit loop that can be vectorized. + /// Currently, a loop with an uncountable early exit is considered + /// vectorizable if: + /// 1. There are no writes to memory in the loop. + /// 2. The loop has only one early uncountable exit + /// 3. The early exit block dominates the latch block. + /// 4. The latch block has an exact exit count. + /// 5. The loop does not contain reductions or recurrences. + /// 6. We can prove at compile-time that loops will not contain faulting + /// loads. + /// 7. It is safe to speculatively execute instructions such as divide or + /// call instructions. + /// The list above is not based on theoretical limitations of vectorization, + /// but simply a statement that more work is needed to support these + /// additional cases safely. + bool isVectorizableEarlyExitLoop(); + /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know @@ -551,6 +602,17 @@ class LoopVectorizationLegality { /// (potentially) make a better decision on the maximum VF and enable /// the use of those function variants. bool VecCallVariantsFound = false; + + /// Indicates whether this loop has a speculative early exit, i.e. an + /// uncountable exiting block that is not the latch. + bool HasSpeculativeEarlyExit = false; + + /// Keep track of all the loop exiting blocks. + SmallVector CountableExitingBlocks; + SmallVector UncountableExitingBlocks; + + /// Keep track of the destinations of all uncountable exits. + SmallVector UncountableExitBlocks; }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 7062e21383a5fc..9645bd877fbf5e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1445,6 +1445,145 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG( return Result; } +bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { + BasicBlock *LatchBB = TheLoop->getLoopLatch(); + if (!LatchBB) { + reportVectorizationFailure("Loop does not have a latch", + "Cannot vectorize early exit loop", + "NoLatchEarlyExit", ORE, TheLoop); + return false; + } + + if (Reductions.size() || FixedOrderRecurrences.size()) { + reportVectorizationFailure( + "Found reductions or recurrences in early-exit loop", + "Cannot vectorize early exit loop with reductions or recurrences", + "RecurrencesInEarlyExitLoop", ORE, TheLoop); + return false; + } + + SmallVector ExitingBlocks; + TheLoop->getExitingBlocks(ExitingBlocks); + + // Keep a record of all the exiting blocks. + SmallVector Predicates; + for (BasicBlock *BB1 : ExitingBlocks) { + const SCEV *EC = + PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates); + if (isa(EC)) { + UncountableExitingBlocks.push_back(BB1); + + SmallVector Succs(successors(BB1)); + if (Succs.size() != 2) { + reportVectorizationFailure( + "Early exiting block does not have exactly two successors", + "Incorrect number of successors from early exiting block", + "EarlyExitTooManySuccessors", ORE, TheLoop); + return false; + } + + BasicBlock *BB2; + if (!TheLoop->contains(Succs[0])) + BB2 = Succs[0]; + else { + assert(!TheLoop->contains(Succs[1])); + BB2 = Succs[1]; + } + UncountableExitBlocks.push_back(BB2); + } else + CountableExitingBlocks.push_back(BB1); + } + Predicates.clear(); + + // We only support one uncountable early exit. + if (getUncountableExitingBlocks().size() != 1) { + reportVectorizationFailure( + "Loop has too many uncountable exits", + "Cannot vectorize early exit loop with more than one early exit", + "TooManyUncountableEarlyExits", ORE, TheLoop); + return false; + } + + // The only supported early exit loops so far are ones where the early + // exiting block is a unique predecessor of the latch block. + BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor(); + if (LatchPredBB != getSpeculativeEarlyExitingBlock()) { + reportVectorizationFailure("Early exit is not the latch predecessor", + "Cannot vectorize early exit loop", + "EarlyExitNotLatchPredecessor", ORE, TheLoop); + return false; + } + + // Check to see if there are instructions that could potentially generate + // exceptions or have side-effects. + auto IsSafeOperation = [](Instruction *I) -> bool { + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::PHI: + case Instruction::Br: + // These are checked separately. + return true; + default: + return isSafeToSpeculativelyExecute(I); + } + }; + + for (auto *BB : TheLoop->blocks()) + for (auto &I : *BB) { + if (I.mayWriteToMemory()) { + // We don't support writes to memory. + reportVectorizationFailure( + "Writes to memory unsupported in early exit loops", + "Cannot vectorize early exit loop with writes to memory", + "WritesInEarlyExitLoop", ORE, TheLoop); + return false; + } else if (!IsSafeOperation(&I)) { + reportVectorizationFailure("Early exit loop contains operations that " + "cannot be speculatively executed", + "Early exit loop contains operations that " + "cannot be speculatively executed", + "UnsafeOperationsEarlyExitLoop", ORE, + TheLoop); + return false; + } + } + + // The latch block must have a countable exit. + if (isa( + PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) { + reportVectorizationFailure( + "Cannot determine exact exit count for latch block", + "Cannot vectorize early exit loop", + "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop); + return false; + } + + // The vectoriser cannot handle loads that occur after the early exit block. + assert(LatchBB->getUniquePredecessor() == getSpeculativeEarlyExitingBlock() && + "Expected latch predecessor to be the early exiting block"); + + // TODO: Handle loops that may fault. + if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) { + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } + + LLVM_DEBUG( + dbgs() + << "LV: Found an early exit. Retrying with speculative exit count.\n"); + const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount(); + assert(!isa(SpecExitCount) && + "Failed to get symbolic expression for backedge taken count"); + + LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: " + << *SpecExitCount << '\n'); + return true; +} + bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. @@ -1505,6 +1644,17 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } + HasSpeculativeEarlyExit = false; + if (isa(PSE.getBackedgeTakenCount())) { + if (!isVectorizableEarlyExitLoop()) { + if (DoExtraAnalysis) + Result = false; + else + return false; + } else + HasSpeculativeEarlyExit = true; + } + // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); @@ -1514,16 +1664,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } - if (isa(PSE.getBackedgeTakenCount())) { - reportVectorizationFailure("could not determine number of loop iterations", - "could not determine number of loop iterations", - "CantComputeNumberOfIterations", ORE, TheLoop); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - if (Result) { LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" << (LAI->getRuntimePointerChecking()->Need diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0fa7c2add1faa2..804bcc51b960bf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9815,6 +9815,14 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } + if (LVL.hasSpeculativeEarlyExit()) { + reportVectorizationFailure( + "Auto-vectorization of early exit loops is not yet supported.", + "Auto-vectorization of early exit loops is not yet supported.", + "EarlyExitLoopsUnsupported", ORE, L); + return false; + } + // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before // even evaluating whether vectorization is profitable. Since we cannot modify diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll index ac33f6e3e6f728..99911b251c81e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -12,7 +12,7 @@ ; } ; } ; File, line, and column should match those specified in the metadata -; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations +; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop ; CHECK: remark: source.cpp:5:9: loop not vectorized ; void test_disabled(int *A, int Length) { @@ -46,12 +46,12 @@ ; YAML: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: CantComputeNumberOfIterations +; YAML-NEXT: Name: EarlyExitNotLatchPredecessor ; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 5, Column: 9 } ; YAML-NEXT: Function: _Z4testPii ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'loop not vectorized: ' -; YAML-NEXT: - String: could not determine number of loop iterations +; YAML-NEXT: - String: Cannot vectorize early exit loop ; YAML-NEXT: ... ; YAML-NEXT: --- !Missed ; YAML-NEXT: Pass: loop-vectorize @@ -117,12 +117,12 @@ ; YAML-NEXT: ... ; YAML-NEXT: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize -; YAML-NEXT: Name: CantComputeNumberOfIterations +; YAML-NEXT: Name: EarlyExitNotLatchPredecessor ; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } ; YAML-NEXT: Function: test_multiple_failures ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'loop not vectorized: ' -; YAML-NEXT: - String: could not determine number of loop iterations +; YAML-NEXT: - String: Cannot vectorize early exit loop ; YAML-NEXT: ... ; YAML: --- !Missed ; YAML-NEXT: Pass: loop-vectorize diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll index a27f2f0841bca8..3a8aec34dfe43e 100644 --- a/llvm/test/Transforms/LoopVectorize/control-flow.ll +++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll @@ -10,7 +10,7 @@ ; return 0; ; } -; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations +; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory ; CHECK: remark: source.cpp:5:9: loop not vectorized ; CHECK: _Z4testPii diff --git a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll index 2a5240e73c6f8b..46a4592d0208ee 100644 --- a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll @@ -3,7 +3,7 @@ ; Make sure LV does not crash when generating remarks for loops with non-unique ; exit blocks. define i32 @test_non_unique_exit_blocks(ptr nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) { -; CHECK: loop not vectorized: could not determine number of loop iterations +; CHECK: loop not vectorized: Cannot vectorize early exit loop ; entry: br label %for.header diff --git a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll new file mode 100644 index 00000000000000..f905bcd73dc952 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll @@ -0,0 +1,1941 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize 2>%t | FileCheck %s --check-prefixes=CHECK +; RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +declare void @init_mem(ptr, i64); + +define i64 @same_exit_block_pre_inc_use1() { +; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1' +; DEBUG: LV: Found an early exit. Retrying with speculative exit count. +; DEBUG-NEXT: LV: Found speculative backedge taken count: 63 +; DEBUG-NEXT: LV: We can vectorize this loop! +; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops is not yet supported. +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P2]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds [1024 x i8], ptr %p1, i64 0, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [1024 x i8], ptr %p2, i64 0, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [40 x i32], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [40 x i32], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [40 x i32] + %p2 = alloca [40 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use2() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 67, %loop ], [ %index, %loop.inc ] + ret i64 %retval +} + +define i64 @same_exit_block_pre_inc_use3() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use3() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + ret i64 %index +} + + +; In this example the early exit block appears in the list of ExitNotTaken +; SCEVs, but is not computable. +define i64 @same_exit_block_pre_inc_use4() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i64] + %p2 = alloca [1024 x i64] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index + %ld1 = load i64, ptr %arrayidx, align 1 + %cmp3 = icmp ult i64 %index, %ld1 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + + +define i64 @same_exit_block_post_inc_use() { +; CHECK-LABEL: define i64 @same_exit_block_post_inc_use() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ %index.next, %loop.inc ] + ret i64 %retval +} + +define i64 @same_exit_block_post_inc_use2() { +; CHECK-LABEL: define i64 @same_exit_block_post_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %index.next = add i64 %index, 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index.next, %loop ], [ %index, %loop.inc ] + ret i64 %retval +} + +define i64 @same_exit_block_phi_of_consts() { +; CHECK-LABEL: define i64 @same_exit_block_phi_of_consts() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %loop ], [ 1, %loop.inc ] + ret i64 %retval +} + + +define i64 @diff_exit_block_pre_inc_use1() { +; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ %index, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ 67, %loop.inc ] + ret i64 %retval2 +} + +define i64 @diff_exit_block_pre_inc_use2() { +; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ 67, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ %index, %loop.inc ] + ret i64 %retval2 +} + +define i64 @diff_exit_block_pre_inc_use3() { +; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use3() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] +; CHECK: loop.end: +; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + ret i64 %index + +loop.end: + ret i64 %index +} + + +define i64 @diff_exit_block_phi_of_consts() { +; CHECK-LABEL: define i64 @diff_exit_block_phi_of_consts() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: ret i64 0 +; CHECK: loop.end: +; CHECK-NEXT: ret i64 1 +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + ret i64 0 + +loop.end: + ret i64 1 +} + + +define i64 @diff_exit_block_post_inc_use1() { +; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ %index, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ %index.next, %loop.inc ] + ret i64 %retval2 +} + + +define i64 @diff_exit_block_post_inc_use2() { +; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %index.next = add i64 %index, 1 + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ %index.next, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ %index, %loop.inc ] + ret i64 %retval2 +} + + +; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't +; support this yet. +define i64 @early_exit_on_last_block() { +; DEBUG-LABEL: LV: Checking a loop in 'early_exit_on_last_block' +; DEBUG: LV: Not vectorizing: Early exit is not the latch predecessor. +; CHECK-LABEL: define i64 @early_exit_on_last_block() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[SEARCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT]], label [[LAND_RHS]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ 64, [[LAND_RHS]] ], [ [[INDEX]], [[SEARCH]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %search ], [ 3, %entry ] + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop + +loop.end: + %retval = phi i64 [ 64, %loop ], [ %index, %search ] + ret i64 %retval +} + + +; There are multiple exit blocks - two of them have an exact representation for the +; exit-not-taken counts and the other is unknown, i.e. the "early exit". +define i64 @multiple_exits_one_early() { +; CHECK-LABEL: define i64 @multiple_exits_one_early() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ [[INDEX]], [[SEARCH]] ], [ 128, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 64, %loop ], [ %index, %search ], [ 128, %loop.inc ] + ret i64 %retval +} + + +; We don't currently support multiple early exits. +define i64 @multiple_early_exits() { +; DEBUG-LABEL: LV: Checking a loop in 'multiple_early_exits' +; DEBUG: LV: Not vectorizing: Loop has too many uncountable exits. +; CHECK-LABEL: define i64 @multiple_early_exits() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: search1: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_INC:%.*]] +; CHECK: search2: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[TMP41]], 34 +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_END_LOOPEXIT]], label [[FOR_INC1]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 100, [[FOR_INC]] ], [ 43, [[FOR_INC1]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %search1 + +search1: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp1 = icmp eq i8 %ld1, %ld2 + br i1 %cmp1, label %loop.end, label %search2 + +search2: + %cmp2 = icmp ult i8 %ld1, 34 + br i1 %cmp2, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %search1, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ] + ret i64 %retval +} + + +define i64 @early_exit_infinite_loop() { +; DEBUG-LABEL: LV: Checking a loop in 'early_exit_infinite_loop' +; DEBUG: LV: Not vectorizing: Cannot determine exact exit count for latch block. +; CHECK-LABEL: define i64 @early_exit_infinite_loop() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br label [[LAND_RHS]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br label %loop + +loop.end: + %retval = phi i64 [ %index, %loop ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use_inv_cond(i1 %cond) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use_inv_cond( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: [[CMP4:%.*]] = select i1 [[COND]], i1 [[CMP3]], i1 false +; CHECK-NEXT: br i1 [[CMP4]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + %cmp4 = select i1 %cond, i1 %cmp3, i1 false + br i1 %cmp4, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_call() { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_safe_call' +; DEBUG: LV: Found an early exit. Retrying with speculative exit count. +; DEBUG-NEXT: LV: Found speculative backedge taken count: 63 +; DEBUG-NEXT: LV: We can vectorize this loop! +; CHECK-LABEL: define i64 @loop_contains_safe_call() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index + %ld1 = load float, ptr %arrayidx, align 1 + %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1) + %cmp = fcmp fast ult float %sqrt, 3.0e+00 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_unsafe_call() { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_unsafe_call' +; DEBUG: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed. +; CHECK-LABEL: define i64 @loop_contains_unsafe_call() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[BAD_CALL]], 34 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %bad_call = call i32 @foo(i32 %ld1) #0 + %cmp = icmp eq i32 %bad_call, 34 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_div() { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_safe_div' +; DEBUG: LV: Found an early exit. Retrying with speculative exit count. +; DEBUG-NEXT: LV: Found speculative backedge taken count: 63 +; DEBUG-NEXT: LV: We can vectorize this loop! +; CHECK-LABEL: define i64 @loop_contains_safe_div() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX2]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC1]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 %ld1, 20000 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_unsafe_div() { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_unsafe_div' +; DEBUG: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed. +; CHECK-LABEL: define i64 @loop_contains_unsafe_div() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 20000, [[LD1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 20000, %ld1 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_store(ptr %dest) { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_store' +; DEBUG: LV: Not vectorizing: Writes to memory unsupported in early exit loops +; CHECK-LABEL: define i64 @loop_contains_store( +; CHECK-SAME: ptr [[DEST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[LD1]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %index + store i32 %ld1, ptr %arrayidx2, align 4 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' +; DEBUG: LV: Found an early exit. Retrying with speculative exit count. +; DEBUG-NEXT: LV: Found speculative backedge taken count: 63 +; DEBUG-NEXT: LV: We can vectorize this loop! +; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops is not yet supported. +; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit( +; CHECK-SAME: ptr align 8 dereferenceable(1024) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index + %ld2 = load i64, ptr %arrayidx2, align 8 + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ] + ret i64 %retval +} + + +define i64 @early_exit_in_conditional_block(ptr %mask) { +; DEBUG-LABEL: LV: Checking a loop in 'early_exit_in_conditional_block' +; DEBUG: LV: Not vectorizing: Early exit is not the latch predecessor. +; CHECK-LABEL: define i64 @early_exit_in_conditional_block( +; CHECK-SAME: ptr [[MASK:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[MASK]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[LD1]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_SEARCH:%.*]], label [[LOOP_INC]] +; CHECK: loop.search: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD2]], [[LD3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP_SEARCH]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index + %ld1 = load i8, ptr %arrayidx1, align 1 + %cmp1 = icmp ne i8 %ld1, 0 + br i1 %cmp1, label %loop.search, label %loop.inc + +loop.search: + %arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld3 = load i8, ptr %arrayidx3, align 1 + %cmp2 = icmp eq i8 %ld2, %ld3 + br i1 %cmp2, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_reverse() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_reverse() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 1023, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 1023, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, -1 + %exitcond = icmp eq i64 %index.next, 0 + br i1 %exitcond, label %loop.end, label %loop + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 1024, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_with_reduction() { +; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_with_reduction' +; DEBUG: LV: Not vectorizing: Found reductions or recurrences in early-exit loop. +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_with_reduction() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[RED_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[LD2_ZEXT:%.*]] = zext i8 [[TMP39]] to i64 +; CHECK-NEXT: [[RED_NEXT]] = add i64 [[RED]], [[LD2_ZEXT]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], [[FOR_INC]] ], [ [[RED_NEXT]], [[LAND_RHS]] ] +; CHECK-NEXT: [[FINAL_IND:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = add i64 [[RED_NEXT_LCSSA]], [[FINAL_IND]] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %ld2.zext = zext i8 %ld2 to i64 + %red.next = add i64 %red, %ld2.zext + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + %retval = add i64 %red.next, %final.ind + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p1, ptr dereferenceable(1024) %p2) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(1024) [[P1:%.*]], ptr dereferenceable(1024) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +; The form of the induction variables requires SCEV predicates. +; TODO: We should fix isDereferenceableAndAlignedInLoop and +; getSmallConstantMaxTripCount to cope with SCEV predicates when +; requesting the small constant max trip count. +define i32 @diff_exit_block_needs_scev_check(i32 %end) { +; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' +; DEBUG: LV: Not vectorizing: Loop may fault. +; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check( +; CHECK-SAME: i32 [[END:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 +; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK: found: +; CHECK-NEXT: ret i32 1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %p1 = alloca [1024 x i32] + %p2 = alloca [1024 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + %end.clamped = and i32 %end, 1023 + br label %for.body + +for.body: + %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ] + %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ] + %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind + %1 = load i32, ptr %arrayidx2, align 4 + %cmp.early = icmp eq i32 %0, %1 + br i1 %cmp.early, label %found, label %for.inc + +for.inc: + %ind.next = add i8 %ind, 1 + %conv = zext i8 %ind.next to i32 + %gep.ind.next = add i64 %gep.ind, 1 + %cmp = icmp ult i32 %conv, %end.clamped + br i1 %cmp, label %for.body, label %exit + +found: + ret i32 1 + +exit: + ret i32 0 +} + + +declare void @abort() + +; This is a variant of an early exit loop where the condition for leaving +; early is loop invariant. +define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { +; DEBUG-LABEL: LV: Checking a loop in 'diff_blocks_invariant_early_exit_cond' +; DEBUG: LV: Found an early exit. Retrying with speculative exit count. +; DEBUG-NEXT: LV: Found speculative backedge taken count: 275 +; DEBUG: LV: Not vectorizing: Auto-vectorization of early exit loops is not yet supported. +; CHECK-LABEL: define i32 @diff_blocks_invariant_early_exit_cond( +; CHECK-SAME: ptr [[S:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: early.exit: +; CHECK-NEXT: tail call void @abort() +; CHECK-NEXT: unreachable +; CHECK: for.end: +; CHECK-NEXT: ret i32 0 +; +entry: + %sval = load i32, ptr %s, align 4 + %cond = icmp eq i32 %sval, 0 + br label %for.body + +for.body: + %ind = phi i32 [ -10, %entry ], [ %ind.next, %for.inc ] + br i1 %cond, label %for.inc, label %early.exit + +for.inc: + %ind.next = add nsw i32 %ind, 1 + %exitcond.not = icmp eq i32 %ind.next, 266 + br i1 %exitcond.not, label %for.end, label %for.body + +early.exit: + tail call void @abort() + unreachable + +for.end: + ret i32 0 +} + + +define i64 @early_exit_has_multiple_outside_successors() { +; DEBUG-LABEL: LV: Checking a loop in 'early_exit_has_multiple_outside_successors' +; DEBUG: LV: Not vectorizing: Loop contains an unsupported switch +; CHECK-LABEL: define i64 @early_exit_has_multiple_outside_successors() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: switch i8 [[LD1]], label [[LOOP_INC]] [ +; CHECK-NEXT: i8 2, label [[LOOP_END:%.*]] +; CHECK-NEXT: i8 3, label [[LOOP_SURPRISE:%.*]] +; CHECK-NEXT: ] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.surprise: +; CHECK-NEXT: ret i64 3 +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + switch i8 %ld1, label %loop.inc [ + i8 2, label %loop.end + i8 3, label %loop.surprise + ] + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.surprise: + ret i64 3 + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { +; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas' +; DEBUG: LV: Not vectorizing: Loop may fault. +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + + +declare i32 @foo(i32) readonly +declare @foo_vec() + +attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }