Skip to content

Commit

Permalink
[SLP]Check that operand of abs does not overflow before making it par…
Browse files Browse the repository at this point in the history
…t of minbitwidth transformation

Need to check that the operand of the abs intrinsic can be safely
truncated before making it part of the minbitwidth transformation.

Fixes #112577

(cherry picked from commit 709abac)
  • Loading branch information
alexey-bataev authored and llvmbot committed Oct 21, 2024
1 parent e2f6ef4 commit c4bfd01
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
16 changes: 16 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote(
MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
});
};
auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
return all_of(E.Scalars, [&](Value *V) {
auto *I = cast<Instruction>(V);
unsigned SignBits = OrigBitWidth - BitWidth;
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
unsigned Op0SignBits =
ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT);
return SignBits <= Op0SignBits &&
((SignBits != Op0SignBits &&
!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) ||
MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)));
});
};
if (ID != Intrinsic::abs) {
Operands.push_back(getOperandEntry(&E, 1));
CallChecker = CompChecker;
} else {
CallChecker = AbsChecker;
}
InstructionCost BestCost =
std::numeric_limits<InstructionCost::CostType>::max();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ define i32 @test(i32 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2>
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 273837369, i32 273837369>
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false)
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], <i64 273837369, i64 273837369>
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true)
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]
Expand Down

0 comments on commit c4bfd01

Please sign in to comment.