-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Canonicalize the fcmp range check idiom into fabs + fcmp
#76367
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch canonicalizes the fcmp range check idiom into
Alive2: https://alive2.llvm.org/ce/z/MRtoYq Full diff: https://github.com/llvm/llvm-project/pull/76367.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5e362f4117d051..689ee6eaabb4b9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1457,6 +1457,32 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
}
}
+ // Canonicalize the range check idiom:
+ // and (fcmp olt/ole/ult/ule x, C), (fcmp ogt/oge/ugt/uge x, -C)
+ // --> fabs(x) olt/ole/ult/ule C
+ // or (fcmp ogt/oge/ugt/uge x, C), (fcmp olt/ole/ult/ule x, -C)
+ // --> fabs(x) ogt/oge/ugt/uge C
+ // TODO: Generalize to handle a negated variable operand?
+ const APFloat *LHSC, *RHSC;
+ if (LHS->hasOneUse() && RHS->hasOneUse() && LHS0 == RHS0 &&
+ FCmpInst::getSwappedPredicate(PredL) == PredR &&
+ match(LHS1, m_APFloatAllowUndef(LHSC)) &&
+ match(RHS1, m_APFloatAllowUndef(RHSC)) &&
+ LHSC->bitwiseIsEqual(neg(*RHSC))) {
+ auto IsLessThanOrLessEqual = [](FCmpInst::Predicate Pred) {
+ return (getFCmpCode(Pred) & 0b0110) == 0b0100;
+ };
+ if (IsLessThanOrLessEqual(IsAnd ? PredR : PredL)) {
+ std::swap(LHSC, RHSC);
+ std::swap(PredL, PredR);
+ }
+ if (IsLessThanOrLessEqual(IsAnd ? PredL : PredR)) {
+ Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, LHS0);
+ return Builder.CreateFCmp(PredL, FAbs,
+ ConstantFP::get(LHS0->getType(), *LHSC));
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll
new file mode 100644
index 00000000000000..06e5ca05ff1501
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
+
+declare void @use(i1)
+
+define i1 @test_and_olt(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ogt float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_ole(float %x) {
+; CHECK-LABEL: define i1 @test_and_ole(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp ole float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp ole float %x, 0x3C00000000000000
+ %cmp2 = fcmp oge float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_or_ogt(float %x) {
+; CHECK-LABEL: define i1 @test_or_ogt(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp ogt float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp ogt float %x, 0x3C00000000000000
+ %cmp2 = fcmp olt float %x, 0xBC00000000000000
+ %cond = or i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_or_oge(float %x) {
+; CHECK-LABEL: define i1 @test_or_oge(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp oge float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp oge float %x, 0x3C00000000000000
+ %cmp2 = fcmp ole float %x, 0xBC00000000000000
+ %cond = or i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_ult(float %x) {
+; CHECK-LABEL: define i1 @test_and_ult(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp ult float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp ult float %x, 0x3C00000000000000
+ %cmp2 = fcmp ugt float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_ule(float %x) {
+; CHECK-LABEL: define i1 @test_and_ule(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp ule float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp ule float %x, 0x3C00000000000000
+ %cmp2 = fcmp uge float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_or_ugt(float %x) {
+; CHECK-LABEL: define i1 @test_or_ugt(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp ugt float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp ugt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ult float %x, 0xBC00000000000000
+ %cond = or i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_or_uge(float %x) {
+; CHECK-LABEL: define i1 @test_or_uge(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp uge float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp uge float %x, 0x3C00000000000000
+ %cmp2 = fcmp ule float %x, 0xBC00000000000000
+ %cond = or i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_commuted(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_commuted(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ogt float %x, 0xBC00000000000000
+ %cond = and i1 %cmp2, %cmp1
+ ret i1 %cond
+}
+define i1 @test_and_olt_subnormal(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_subnormal(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[TMP1]], 0x36A0000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x36A0000000000000
+ %cmp2 = fcmp ogt float %x, 0xB6A0000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_infinity(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_infinity(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp one float [[TMP1]], 0x7FF0000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x7FF0000000000000
+ %cmp2 = fcmp ogt float %x, 0xFFF0000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_zero(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_zero(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %cmp1 = fcmp olt float %x, 0x0000000000000000
+ %cmp2 = fcmp ogt float %x, 0x8000000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_ole_zero(float %x) {
+; CHECK-LABEL: define i1 @test_and_ole_zero(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[COND:%.*]] = fcmp oeq float [[X]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp ole float %x, 0x0000000000000000
+ %cmp2 = fcmp oge float %x, 0x8000000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_logical(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_logical(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[TMP1]], 0x3C00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ogt float %x, 0xBC00000000000000
+ %cond = select i1 %cmp1, i1 %cmp2, i1 false
+ ret i1 %cond
+}
+define <2 x i1> @test_and_olt_undef(<2 x float> %x) {
+; CHECK-LABEL: define <2 x i1> @test_and_olt_undef(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], <float 0x3C00000000000000, float 0x3C00000000000000>
+; CHECK-NEXT: ret <2 x i1> [[COND]]
+;
+ %cmp1 = fcmp olt <2 x float> %x, <float 0x3C00000000000000, float undef>
+ %cmp2 = fcmp ogt <2 x float> %x, <float 0xBC00000000000000, float undef>
+ %cond = and <2 x i1> %cmp1, %cmp2
+ ret <2 x i1> %cond
+}
+define i1 @test_and_olt_nan(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_nan(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %cmp1 = fcmp olt float %x, 0x7FF8000000000000
+ %cmp2 = fcmp ogt float %x, 0xFFF8000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_ogt(float %x) {
+; CHECK-LABEL: define i1 @test_and_ogt(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %cmp1 = fcmp ogt float %x, 0x3C00000000000000
+ %cmp2 = fcmp olt float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_or_olt(float %x) {
+; CHECK-LABEL: define i1 @test_or_olt(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp ogt float [[TMP1]], 0xBC00000000000000
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ogt float %x, 0xBC00000000000000
+ %cond = or i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+; Negative tests
+define i1 @test_and_olt_multiuse(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_multiuse(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 0x3C00000000000000
+; CHECK-NEXT: call void @use(i1 [[CMP1]])
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X]], 0xBC00000000000000
+; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ call void @use(i1 %cmp1)
+ %cmp2 = fcmp ogt float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_mismatched_lhs(float %x, float %y) {
+; CHECK-LABEL: define i1 @test_and_olt_mismatched_lhs(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 0x3C00000000000000
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[Y]], 0xBC00000000000000
+; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ogt float %y, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_same_sign(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_same_sign(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp ogt float %x, 0x3C00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_mismatched_mag(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_mismatched_mag(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 0x3C80000000000000
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X]], 0xBC00000000000000
+; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C80000000000000
+ %cmp2 = fcmp ogt float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
+define i1 @test_and_olt_wrong_pred2(float %x) {
+; CHECK-LABEL: define i1 @test_and_olt_wrong_pred2(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 0x3C00000000000000
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp oge float [[X]], 0xBC00000000000000
+; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %cmp1 = fcmp olt float %x, 0x3C00000000000000
+ %cmp2 = fcmp oge float %x, 0xBC00000000000000
+ %cond = and i1 %cmp1, %cmp2
+ ret i1 %cond
+}
|
May I assume that you do the same combine for integer range checks, i.e. and of icmps? Could you point me please to the combine? |
I don't think they are the same combine. We always convert and/or of icmps into llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Lines 1260 to 1329 in 76243ad
|
Thanks! |
Do you need to pass FMF-flags from the original floating point operations to the new ones? |
d5850ff
to
5a41406
Compare
Done. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mostly lgtm with some nits
|
||
Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, LHS0); | ||
return Builder.CreateFCmp(PredL, FAbs, | ||
ConstantFP::get(LHS0->getType(), *LHSC)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
seems like you could just pass the original value through, but I guess that doesn't work if the swap happened above
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PredL/R and LHSC/RHSC are also swapped.
5a41406
to
df2bfe7
Compare
Ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
still lgtm with some nits
…s + fcmp Add more NaN tests.
df2bfe7
to
11881d5
Compare
The old definitions of bool_1, bool_2, bool_3 in simd_op_check_x86 (etc) all referred to the same entry in in_f32; as of llvm/llvm-project#76367, the LLVM optimizer is smart enough to realize that (eg) bool1 != bool2 by construction, and optimizes away the code that tests their conditions, such as the one for andps and orps. Initing them from different locations is enough to outsmart the compiler. (bug was only noticed in the x86 test, but I updated the other tests to guard against future improvements there too.)
The old definitions of bool_1, bool_2, bool_3 in simd_op_check_x86 (etc) all referred to the same entry in in_f32; as of llvm/llvm-project#76367, the LLVM optimizer is smart enough to realize that (eg) bool1 != bool2 by construction, and optimizes away the code that tests their conditions, such as the one for andps and orps. Initing them from different locations is enough to outsmart the compiler. (bug was only noticed in the x86 test, but I updated the other tests to guard against future improvements there too.)
The old definitions of bool_1, bool_2, bool_3 in simd_op_check_x86 (etc) all referred to the same entry in in_f32; as of llvm/llvm-project#76367, the LLVM optimizer is smart enough to realize that (eg) bool1 != bool2 by construction, and optimizes away the code that tests their conditions, such as the one for andps and orps. Initing them from different locations is enough to outsmart the compiler. (bug was only noticed in the x86 test, but I updated the other tests to guard against future improvements there too.)
This patch canonicalizes the fcmp range check idiom into
fabs + fcmp
since the canonicalized form is better than the original form for the backends.Godbolt: https://godbolt.org/z/x3eqPb1fz
Alive2: https://alive2.llvm.org/ce/z/MRtoYq