[SCEVExpander] Expand UDiv avoiding UB when in seq_min/max. #92177

fhahn · 2024-05-14T21:04:40Z

SCEVExpander to introduce an SafeUDivMode, which is set
when expanding operands of SCEVSequentialMinMaxExpr. In this mode,
the expander will make sure that the divisor of the expanded UDiv is neither
0 nor poison.

Fixes #89958.

llvmbot · 2024-05-14T21:05:11Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

Changes

Introduce a utility to check if a SCEV expansion may introduce UB (couldn't find a similar utility after a quick glance) and use to the avoid vectorizing when expanding the trip count introduces UB.

Fixes #89958.

Full diff: https://github.com/llvm/llvm-project/pull/92177.diff

4 Files Affected:

(modified) llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h (+4)
(modified) llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp (+5)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+26)
(modified) llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll (+6-94)

diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 62c1e15a9a60e..270286afebd2a 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -418,6 +418,10 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
   BasicBlock::iterator findInsertPointAfter(Instruction *I,
                                             Instruction *MustDominate) const;
 
+  /// Returns true if expanding \p Expr may introduce UB (e.g. because the
+  /// expression contains an UDiv expression).
+  static bool expansionMayIntroduceUB(const SCEV *Expr);
+
 private:
   LLVMContext &getContext() const { return SE.getContext(); }
 
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 0feea0a4233cd..0a5ec8baccbee 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -161,6 +161,11 @@ SCEVExpander::findInsertPointAfter(Instruction *I,
   return IP;
 }
 
+bool SCEVExpander::expansionMayIntroduceUB(const SCEV *Expr) {
+  return SCEVExprContains(Expr,
+                          [](const SCEV *Op) { return isa<SCEVUDivExpr>(Op); });
+}
+
 BasicBlock::iterator
 SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
   // Cast the argument at the beginning of the entry block, after
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 9de49d1bcfeac..e1b1ad083f184 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 
@@ -1506,6 +1507,31 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
+  SmallVector<BasicBlock *> Exiting;
+  TheLoop->getExitingBlocks(Exiting);
+  // Check if eexit count for any exit that may execute unconditionally may in
+  // introduce UB. Note that we can skip checks in the header or if there's a
+  // single exit, as in those cases we know that the exit count will be
+  // evaluated in each loop iteration. There are other cases where the exiting
+  // block executes on each loop iteration, but we don't have a cheap way to
+  // check at the moment.
+  ScalarEvolution &SE = *PSE.getSE();
+  if (Exiting.size() > 1 && any_of(Exiting, [this, &SE](BasicBlock *E) {
+        if (TheLoop->getHeader() == E)
+          return false;
+        const SCEV *EC = SE.getExitCount(TheLoop, E);
+        return !isa<SCEVCouldNotCompute>(EC) &&
+               SCEVExpander::expansionMayIntroduceUB(EC);
+      })) {
+    LLVM_DEBUG(
+        dbgs()
+        << "LV: Expanding the trip count expression may introduce UB.\n");
+    if (DoExtraAnalysis)
+      Result = false;
+    else
+      return false;
+  }
+
   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
                     << (LAI->getRuntimePointerChecking()->Need
                             ? " (with a runtime bound check)"
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
index 85fad6fb36328..168c0f88e4fc0 100644
--- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
+++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -74,66 +74,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_in_block_executed_unconditionally(
 ; CHECK-LABEL: define i64 @multi_exit_2_exit_count_with_udiv_in_block_executed_unconditionally(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
-; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 42, [[N]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
-; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP9]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
-; CHECK:       pred.store.continue:
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
-; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
-; CHECK:       pred.store.if1:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP12]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
-; CHECK:       pred.store.continue2:
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
-; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; CHECK:       pred.store.if3:
-; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP15]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
-; CHECK:       pred.store.continue4:
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
-; CHECK:       pred.store.if5:
-; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP18]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
-; CHECK:       pred.store.continue6:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    br label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i32 [[L]], 10
@@ -148,7 +91,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_in_block_executed_unconditionally(
 ; CHECK:       loop.latch:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
-; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_HEADER]], label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[P:%.*]] = phi i64 [ 1, [[CONTINUE]] ], [ 0, [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    ret i64 [[P]]
@@ -232,40 +175,13 @@ exit:
   ret i64 %p
 }
 
-; FIXME: Currently miscompiled as we unconditionally execute udiv after
-; vectorization.
 define i64 @multi_exit_4_exit_count_with_udiv_in_latch(ptr %dst, i64 %N) {
 ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_in_latch(
 ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
-; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 42, [[N]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    br label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
 ; CHECK-NEXT:    store i32 1, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
@@ -274,7 +190,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_in_latch(ptr %dst, i64 %N) {
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[D:%.*]] = udiv i64 42, [[N]]
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
-; CHECK-NEXT:    br i1 [[C_1]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[LOOP_HEADER]], label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[P:%.*]] = phi i64 [ 1, [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    ret i64 [[P]]
@@ -320,7 +236,7 @@ define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) {
 ; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -334,7 +250,7 @@ define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) {
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[D:%.*]] = udiv i64 42, [[N]]
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
-; CHECK-NEXT:    br i1 [[C_1]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -361,8 +277,4 @@ exit:
 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
 ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
 ;.

efriedma-quic · 2024-05-14T21:29:09Z

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

@@ -161,6 +161,11 @@ SCEVExpander::findInsertPointAfter(Instruction *I,
  return IP;
 }

+bool SCEVExpander::expansionMayIntroduceUB(const SCEV *Expr) {
+  return SCEVExprContains(Expr,
+                          [](const SCEV *Op) { return isa<SCEVUDivExpr>(Op); });


Is this the same as SafeToHoist in SCEVExpander::expand?

We could probably do something more clever if it mattered: the only way for the exit count to contain UB is if the branch never executes, so it would be correct to just transform a/b to a/umax(b, 1).

This is SCEVExpander::isSafeToExpand(). (I think the SafeToHoist stuff is some half-dead leftover to handle call-sites that fail to call isSafeToExpand.)

Updated the PR to also check if the divisor is guaranteed to be non-zero. This may miss some safe cases, e.g. where the division happens outside the loop already. Not sure if we have any good way of checking that, except possibly the checks if there are existing equivalent instructions outside the loop during expansion?

We could probably do something more clever if it mattered: the only way for the exit count to contain UB is if the branch never executes, so it would be correct to just transform a/b to a/umax(b, 1).

I think in some cases this may not be correct, e.g. we have two exit counts 1) c and 2) a - (42/b), with the BTC being umin(c, a - (42/b)). If b == 0 then the BTC should be c, but using 1 as divisor may result in a - (42/(umax 1, b)) < c and we would use an incorrect BTC?

e.g. we have two exit counts 1) c and 2) a - (42/b), with the BTC being umin(c, a - (42/b)). If b == 0 then the BTC should be c, but using 1 as divisor may result in a - (42/(umax 1, b)) < c and we would use an incorrect BTC?

In the given example, c has to be zero, I think? If c isn't zero, and b is zero, we divide by zero even before any optimizations run.

Updated the PR to also check if the divisor is guaranteed to be non-zero.

Can we not use isSafeToExpand() for some reason?

Sorry for just coming back to this now!

In the given example, c has to be zero, I think? If c isn't zero, and b is zero, we divide by zero even before any optimizations run.

Ah yes, updated to rewrite the expression, thanks!

Can we not use isSafeToExpand() for some reason?

Updated as well in the latest version, thanks!

There's one more case here I wasn't thinking about: udiv where the RHS is poison. umax isn't sufficient in that case because umax(1,poison) is still poison,

Add additional tests with udiv/urem/sdiv/srem in trip counts, where the divisor is constant. For #92177.

efriedma-quic · 2024-06-28T21:08:55Z

llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h

+  /// If \p L contains exits which may execute conditionally and contain UDiv
+  /// expressions with divisors that can be 0, expanding \p BTC may introduce
+  /// new UB. In that case, rewrite UDiv(A, B) to  UDiv(A, UMAX(1, B)). If B is
+  /// 0, that exit cannot be taken.


Does this assume the loop eventually exits? If it does, probably worth explicitly noting.

efriedma-quic · 2024-06-28T21:10:09Z

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

@@ -161,6 +161,11 @@ SCEVExpander::findInsertPointAfter(Instruction *I,
  return IP;
 }

+bool SCEVExpander::expansionMayIntroduceUB(const SCEV *Expr) {
+  return SCEVExprContains(Expr,
+                          [](const SCEV *Op) { return isa<SCEVUDivExpr>(Op); });


There's one more case here I wasn't thinking about: udiv where the RHS is poison. umax isn't sufficient in that case because umax(1,poison) is still poison,

fhahn · 2024-07-01T16:23:48Z

(replying in main thread, as the comment seems to be gone from the web interface, at least I cannot respond to it any longer)

There's one more case here I wasn't thinking about: udiv where the RHS is poison. umax isn't sufficient in that case because umax(1,poison) is still poison,

Ah yes! In those cases, we need to freeze RHS, but I don't think we can enforce this via a SCEV rewrite, but need to tell this to the expander

efriedma-quic · 2024-09-05T21:40:44Z

What's the status here?

fhahn · 2024-09-06T20:18:20Z

What's the status here?

Thanks for the reminder. Just updated the PR to introduce a SafeUDivMode to SCEVExpander and plug through the information if it is needed to the point where the trip count gets expanded. Not sure if there's a better way to ensure the divisor is frozen when needed

fhahn · 2024-09-13T17:47:51Z

ping :) @efriedma-quic WDYT about the latest update?

efriedma-quic · 2024-09-17T00:30:37Z

llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll

@@ -527,6 +528,76 @@ exit:
  ret i64 %p
 }

+define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, i64 %N) {


I think SCEV is actually computing the trip count wrong here. It currently produces ((42 /u %fr.N) umin (0 smax %N2))... but I think it should actually be ((0 smax %N2) umin_seq (42 /u %fr.N)). And that should be a well-defined if %N2 is zero, even if we would divide by zero without the umin_seq. (I don't think anyone was thinking about divide-by-zero when https://reviews.llvm.org/D116766 was originally proposed, but it seems like a natural extension.)

Under those semantics, we shouldn't need to modify VPlan to examine the exits. SCEVExpander itself should enable SafeUDivMode when it evaluates operands of umin_seq after the first.

(The relevant code on the SCEV side is ScalarEvolution::getSequentialMinMaxExpr; it "optimizes" umin_seq to umin.)

So the proposal here is to change the semantics of umin_seq from a select to a conditional branch, so it not only prevents poison propagation but also UB propagation?

Does that really cover all cases? What if we have one exit with a udiv exit count, but also a prior abnormal exit? Maybe not relevant to vectorization, but thinking more generally here.

I'm not sure there's really a useful distinction between an abnormal exit inside the loop, vs. an abnormal exit in the loop preheader. Either way, if you want to hoist the trip count past such an operation, you need to take additional measures beyond what SCEV normally provides. You need safe-udiv for all udiv operations, and if you want to use the value for anything, you need to freeze it.

For simple vectorization, we should be able to assume the loop exits normally, and we want to use that information to avoid unnecessary freeze operations. So we need to distinguish which freeze/safe-div operations are necessary. The way we currently figure out which operands we need to freeze is via umin_seq; I think it makes sense to figure out which operands need safe-div the same way.

Updated the PR to use umin_seq in the cases as suggested I think; the main additional change is to forbid simplifying A umin_seq B -> A umin B, if B may trigger UB (at the moment just SCEVUDivExpr?). This could be split off separately + SCEV printing test. Just want to make sure this is what you had in mind before doing so.

(There are some redundant freezes of the UDiv result which we could get rid of)

Looking at this again, I am not sure if the divisor being poison in @multi_exit_4_exit_count_with_udiv_by_value_in_latch actually needs freezing; as the expanded expression is used for a branch, if %N is poison, the branch on poison will also trigger UB, just slightly later. Granted at the SCEV & expander level, we don't really know if that will be the case. @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bound uses 2 different bounds for the exits, so here the freeze of the divisor is definitely needed

Yes, this is what I was thinking, roughly.

I am not sure if the divisor being poison in @multi_exit_4_exit_count_with_udiv_by_value_in_latch actually needs freezing

We could probably refine this in SCEVExpander if we end up generating a bunch of freeze in inconvenient places (basically, make a map of SCEVUnknowns which are used without freezing them, and then don't freeze if we see them elsewhere).

Additional tests for #89958, following discussion at #92177.

Introduce a utility to check if a SCEV expansion may introduce UB (couldn't find a similar utility after a quick glance) and use to the avoid vectorizing when expanding the trip count introduces UB. Fixes llvm#89958. !fixup introduce SafeUDivMode to SCEVExpander. Step

efriedma-quic · 2024-09-26T21:22:45Z

llvm/lib/Analysis/ScalarEvolution.cpp

@@ -4304,12 +4304,19 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
  }

  for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+    bool MayBeUB = SCEVExprContains(Ops[i], [](const SCEV *S) {
+      auto *UDiv = dyn_cast<SCEVUDivExpr>(S);
+      return UDiv && !isa<SCEVConstant>(UDiv->getOperand(1));


Need to check for zero constant, I think.

Updated to check for !isKnownNonZero() and added comment, thanks!

efriedma-quic · 2024-09-26T21:24:03Z

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
  Type *Ty = LHS->getType();
  if (IsSequential)
    LHS = Builder.CreateFreeze(LHS);
  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+    SafeUDivMode = i != 0;


I think you need to save/restore the mode, in case you have nested min/max.

Updated to save the initial mode, and updated the code to not set it to false if it was already set.

And restored the original value on exit.

The earlier version was also enabling it if IsSequential was false. Also fixed

efriedma-quic · 2024-09-26T21:29:44Z

llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll

@@ -527,6 +528,76 @@ exit:
  ret i64 %p
 }

+define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, i64 %N) {


Yes, this is what I was thinking, roughly.

I am not sure if the divisor being poison in @multi_exit_4_exit_count_with_udiv_by_value_in_latch actually needs freezing

We could probably refine this in SCEVExpander if we end up generating a bunch of freeze in inconvenient places (basically, make a map of SCEVUnknowns which are used without freezing them, and then don't freeze if we see them elsewhere).

…roduce-ub

github-actions · 2024-09-27T20:43:02Z

✅ With the latest revision this PR passed the C/C++ code formatter.

fhahn · 2024-09-27T20:43:06Z

@efriedma-quic lost the ability to reply to the comment below inline

Yes, this is what I was thinking, roughly.

I am not sure if the divisor being poison in @multi_exit_4_exit_count_with_udiv_by_value_in_latch actually needs freezing
We could probably refine this in SCEVExpander if we end up generating a bunch of freeze in inconvenient places (basically, make a map of SCEVUnknowns which are used without freezing them, and then don't freeze if we see them elsewhere).

Sounds good as a potential follow-up? There are other cases where we also miss freezes I think

efriedma-quic

LGTM

SCEV-only tests for #92177

…roduce-ub

fhahn · 2024-10-02T10:09:02Z

@nikic WDYT about the added comment?

I also added SCEV-only tests, so the SCEV changes could be separated from the SCEVExpander changes if preferred

nikic · 2024-10-02T10:16:52Z

llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h

+///    I.e. given `0 umin_seq poison`, the result will be `0`,
+///    while the result of `0 umin poison` is `poison`.
+///  * if any operand may trigger UB, e.g. if there is an UDiv operand that may
+///    divide by 0.


These aren't really two separate cases. The "early return" happens "upon reaching the saturation point" in both cases. The clarification here is just that "early return" is in the sense that the RHS will not be executed at all (even if it has UB), rather than only that its value will not be used.

Thanks, should be updated in #110824

nikic · 2024-10-02T10:17:15Z

I also added SCEV-only tests, so the SCEV changes could be separated from the SCEVExpander changes if preferred

Yeah, I think it would be good to split off this change.

fhahn · 2024-10-02T10:57:06Z

I also added SCEV-only tests, so the SCEV changes could be separated from the SCEVExpander changes if preferred

Yeah, I think it would be good to split off this change.

Done in #110824

SCEV-only tests for llvm#92177

…roduce-ub

fhahn · 2024-10-14T12:59:13Z

Updated after landing #110824

nikic · 2024-10-15T09:17:08Z

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

+  Value *RHS = expand(RHSExpr);
+  if (SafeUDivMode &&
+      (!isa<SCEVConstant>(RHSExpr) || SE.isKnownNonZero(RHSExpr))) {
+    if (!isa<SCEVConstant>(S->getRHS()))


This can now use the isGuaranteedNotToBePoison check instead.

Thanks, updated. This removes the freezes in a few cases, but that should be fine as in those cases the function would always trigger UB, so expanding the UDiv before the loop simply triggers UB earlier (e.g. multi_exit_4_exit_count_with_udiv_by_value_in_latch). This should be fine I think (https://alive2.llvm.org/ce/z/anDrW9).

Added additional variants where this is not the case (multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop, multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_execute) and there's also multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds

nikic · 2024-10-15T09:17:29Z

llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h

@@ -419,6 +424,9 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
  BasicBlock::iterator findInsertPointAfter(Instruction *I,
                                            Instruction *MustDominate) const;

+  static const SCEV *rewriteExpressionToRemoveUB(const SCEV *BTC, Loop *L,
+                                                 ScalarEvolution &SE);


Unused prototype.

Dropped, thanks!

efriedma-quic · 2024-10-15T18:14:03Z

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

-  Value *RHS = expand(S->getRHS());
+  const SCEV *RHSExpr = S->getRHS();
+  Value *RHS = expand(RHSExpr);
+  if (SafeUDivMode && !SE.isKnownNonZero(RHSExpr)) {


isKnownNonZero doesn't check for poison, I think.

Hm, let me see if I can come up with a case where it is known-non-zero, but may be poison

Something like umax(x, 1) should do the trick.

Added and updated to separately check for !poison -> emit freeze, !known-non-zero -> emit umax

…roduce-ub

Extra tests for #92177, split off the PR.

…roduce-ub

nikic · 2024-10-16T10:04:39Z

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

+      RHS = Builder.CreateFreeze(RHS);
+    if (!SE.isKnownNonZero(RHSExpr))
+      RHS = Builder.CreateIntrinsic(RHS->getType(), Intrinsic::umax,
+                                    {RHS, ConstantInt::get(RHS->getType(), 1)});


This logic still doesn't look quite right to me. If you have isKnownNonZero but not isGuaranteedNotToBePoison, then you still need the umax, because you no longer know that it's non-zero after the freeze.

Updated, thanks!

…roduce-ub

nikic

LGTM, though I'd see this as just a first step of a broader change. As followups, we should:

Make SafeUDivMode the default, and disable it in LV instead.
Make isSafeToExpand only check for addrec expansion validity, not udiv by zero.

Extra tests for llvm#92177, split off the PR.

fhahn · 2024-10-17T20:54:23Z

LGTM, though I'd see this as just a first step of a broader change. As followups, we should:

Make SafeUDivMode the default, and disable it in LV instead.

Make isSafeToExpand only check for addrec expansion validity, not udiv by zero.

Sounds good to me thanks, there is definitely plenty of room for additional improvements as follow-ups!

Update SCEVExpander to introduce an SafeUDivMode, which is set when expanding operands of SCEVSequentialMinMaxExpr. In this mode, the expander will make sure that the divisor of the expanded UDiv is neither 0 nor poison. Fixes llvm#89958. PR llvm#92177

Extra tests for llvm#92177, split off the PR.

Update SCEVExpander to introduce an SafeUDivMode, which is set when expanding operands of SCEVSequentialMinMaxExpr. In this mode, the expander will make sure that the divisor of the expanded UDiv is neither 0 nor poison. Fixes llvm#89958. PR llvm#92177

fhahn requested review from nikic, preames and efriedma-quic May 14, 2024 21:04

llvmbot added vectorizers llvm:transforms labels May 14, 2024

efriedma-quic reviewed May 14, 2024

View reviewed changes

fhahn added a commit that referenced this pull request May 15, 2024

[LV] Add additional variants of tests with udiv/urem/sdiv/srem in TC.

b0a1ae2

Add additional tests with udiv/urem/sdiv/srem in trip counts, where the divisor is constant. For #92177.

fhahn force-pushed the scevexpansion-may-introduce-ub branch 2 times, most recently from a59e11d to 1862db9 Compare June 26, 2024 14:39

fhahn changed the title ~~[LV] Don't vectorize if trip count expansion may introduce UB.~~ [LV] Rewrite UDiv A, B -> UDiv A, UMax(B, 1) in trip counts if needed. Jun 26, 2024

efriedma-quic reviewed Jun 28, 2024

View reviewed changes

fhahn force-pushed the scevexpansion-may-introduce-ub branch from 1862db9 to 6ce1b5f Compare September 6, 2024 20:15

efriedma-quic reviewed Sep 17, 2024

View reviewed changes

fhahn added a commit that referenced this pull request Sep 20, 2024

[LV] Add additional tests for UDiv expansion.

413b12a

Additional tests for #89958, following discussion at #92177.

fhahn force-pushed the scevexpansion-may-introduce-ub branch from 6ce1b5f to bff4e5a Compare September 20, 2024 12:03

llvmbot added the llvm:analysis label Sep 20, 2024

efriedma-quic reviewed Sep 26, 2024

View reviewed changes

fhahn added 2 commits September 27, 2024 21:16

Merge remote-tracking branch 'origin/main' into scevexpansion-may-int…

16a4b00

…roduce-ub

!fixup

32833a7

!fixup incluede missing change

b0d098c

efriedma-quic approved these changes Sep 27, 2024

View reviewed changes

fhahn added a commit that referenced this pull request Oct 2, 2024

[SCEV] Add tests for umin_seq change in #92177

bdd40e3

SCEV-only tests for #92177

Merge remote-tracking branch 'origin/main' into scevexpansion-may-int…

c2dc23d

…roduce-ub

nikic reviewed Oct 2, 2024

View reviewed changes

fhahn changed the title ~~[SCEV] Retain SCEVSequentialMinMaxExpr if an operand may trigger UB.~~ [SCEVExpander] Expand UDiv avoiding UB when in seq_min/max. Oct 2, 2024

Sterling-Augustine pushed a commit to Sterling-Augustine/llvm-project that referenced this pull request Oct 3, 2024

[SCEV] Add tests for umin_seq change in llvm#92177

ec940be

SCEV-only tests for llvm#92177

xgupta pushed a commit to xgupta/llvm-project that referenced this pull request Oct 4, 2024

[SCEV] Add tests for umin_seq change in llvm#92177

834969e

SCEV-only tests for llvm#92177

Merge remote-tracking branch 'origin/main' into scevexpansion-may-int…

1468d8b

…roduce-ub

nikic reviewed Oct 15, 2024

View reviewed changes

!fixup address latest comments, thanks!

c12b25c

efriedma-quic reviewed Oct 15, 2024

View reviewed changes

fhahn added 2 commits October 16, 2024 07:08

Merge remote-tracking branch 'origin/main' into scevexpansion-may-int…

5f31e85

…roduce-ub

!fixup add test

9c4b76d

fhahn added a commit that referenced this pull request Oct 16, 2024

[LV] Add additional trip count expansion tests for #92177.

2442310

Extra tests for #92177, split off the PR.

Merge remote-tracking branch 'origin/main' into scevexpansion-may-int…

b699ddd

…roduce-ub

nikic reviewed Oct 16, 2024

View reviewed changes

fhahn added 2 commits October 16, 2024 11:22

Merge remote-tracking branch 'origin/main' into scevexpansion-may-int…

5469289

…roduce-ub

!fixup also generate umax if the value may be poison.

d5c8931

nikic approved these changes Oct 16, 2024

View reviewed changes

bricknerb pushed a commit to bricknerb/llvm-project that referenced this pull request Oct 16, 2024

[LV] Add additional trip count expansion tests for llvm#92177.

ca2d72b

Extra tests for llvm#92177, split off the PR.

bricknerb pushed a commit to bricknerb/llvm-project that referenced this pull request Oct 17, 2024

[LV] Add additional trip count expansion tests for llvm#92177.

77ef375

Extra tests for llvm#92177, split off the PR.

fhahn merged commit b060661 into llvm:main Oct 17, 2024
8 checks passed

fhahn deleted the scevexpansion-may-introduce-ub branch October 17, 2024 20:55

EricWF pushed a commit to efcs/llvm-project that referenced this pull request Oct 22, 2024

[LV] Add additional trip count expansion tests for llvm#92177.

72010fe

Extra tests for llvm#92177, split off the PR.

[SCEVExpander] Expand UDiv avoiding UB when in seq_min/max. #92177

[SCEVExpander] Expand UDiv avoiding UB when in seq_min/max. #92177

Conversation

fhahn commented May 14, 2024 • edited Loading

llvmbot commented May 14, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

fhahn commented Jul 1, 2024

efriedma-quic commented Sep 5, 2024

fhahn commented Sep 6, 2024

fhahn commented Sep 13, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

github-actions bot commented Sep 27, 2024 • edited Loading

fhahn commented Sep 27, 2024

efriedma-quic left a comment

Choose a reason for hiding this comment

fhahn commented Oct 2, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

nikic commented Oct 2, 2024

fhahn commented Oct 2, 2024

fhahn commented Oct 14, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

nikic left a comment

Choose a reason for hiding this comment

fhahn commented Oct 17, 2024

fhahn commented May 14, 2024 •

edited

Loading

llvmbot commented May 14, 2024 •

edited

Loading

github-actions bot commented Sep 27, 2024 •

edited

Loading