-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SimplifyLibCalls] fdim constant fold #109235
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: None (braw-lee) Changes2nd PR to fix #108695 based on #108702 Full diff: https://github.com/llvm/llvm-project/pull/109235.diff 9 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 5914324b286c05..a18952f038cc31 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -2067,6 +2067,21 @@ TLI_DEFINE_ENUM_INTERNAL(remquol)
TLI_DEFINE_STRING_INTERNAL("remquol")
TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl, Ptr)
+/// double fdim(double x, double y);
+TLI_DEFINE_ENUM_INTERNAL(fdim)
+TLI_DEFINE_STRING_INTERNAL("fdim")
+TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl, Dbl)
+
+/// float fdimf(float x, float y);
+TLI_DEFINE_ENUM_INTERNAL(fdimf)
+TLI_DEFINE_STRING_INTERNAL("fdimf")
+TLI_DEFINE_SIG_INTERNAL(Flt, Flt, Flt)
+
+/// long double fdiml(long double x, long double y);
+TLI_DEFINE_ENUM_INTERNAL(fdiml)
+TLI_DEFINE_STRING_INTERNAL("fdiml")
+TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl)
+
/// int remove(const char *path);
TLI_DEFINE_ENUM_INTERNAL(remove)
TLI_DEFINE_STRING_INTERNAL("remove")
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 2e7a0ec29ed999..467a8f25386b71 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -211,6 +211,7 @@ class LibCallSimplifier {
Value *optimizeTrigInversionPairs(CallInst *CI, IRBuilderBase &B);
Value *optimizeSymmetric(CallInst *CI, LibFunc Func, IRBuilderBase &B);
Value *optimizeRemquo(CallInst *CI, IRBuilderBase &B);
+ Value *optimizeFdim(CallInst *CI, IRBuilderBase &B);
// Wrapper for all floating point library call optimizations
Value *optimizeFloatingPointLibCall(CallInst *CI, LibFunc Func,
IRBuilderBase &B);
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 47413239f3c6cc..1785d77bca985c 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -306,6 +306,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_powf);
TLI.setUnavailable(LibFunc_remainderf);
TLI.setUnavailable(LibFunc_remquof);
+ TLI.setUnavailable(LibFunc_fdimf);
TLI.setUnavailable(LibFunc_sinf);
TLI.setUnavailable(LibFunc_sinhf);
TLI.setUnavailable(LibFunc_sqrtf);
@@ -337,6 +338,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_powl);
TLI.setUnavailable(LibFunc_remainderl);
TLI.setUnavailable(LibFunc_remquol);
+ TLI.setUnavailable(LibFunc_fdiml);
TLI.setUnavailable(LibFunc_sinl);
TLI.setUnavailable(LibFunc_sinhl);
TLI.setUnavailable(LibFunc_sqrtl);
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index b0da19813f0a4b..abe0d8c3d2e999 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1191,6 +1191,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
case LibFunc_fabs:
case LibFunc_fabsf:
case LibFunc_fabsl:
+ case LibFunc_fdim:
+ case LibFunc_fdiml:
+ case LibFunc_fdimf:
case LibFunc_ffs:
case LibFunc_ffsl:
case LibFunc_ffsll:
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 917f81863cf673..5f3bb5fa060729 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -3080,6 +3080,41 @@ Value *LibCallSimplifier::optimizeRemquo(CallInst *CI, IRBuilderBase &B) {
return ConstantFP::get(CI->getType(), Rem);
}
+/// Constant folds fdim
+Value *LibCallSimplifier::optimizeFdim(CallInst *CI, IRBuilderBase &B) {
+ const APFloat *X, *Y;
+ // Check if both values are constants
+ if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
+ !match(CI->getArgOperand(1), m_APFloat(Y)))
+ return nullptr;
+ // If either argument is NaN, NaN is returned
+ if (X->isNaN() || Y->isNaN())
+ return ConstantFP::getQNaN(CI->getType());
+
+ // If the difference if negative, return +0.0
+ if (*X <= *Y)
+ return ConstantFP::get(CI->getType(), 0.0);
+
+ APFloat ReturnVal = *X;
+ APFloat::opStatus Status =
+ ReturnVal.subtract(*Y, RoundingMode::NearestTiesToEven);
+ switch (Status) {
+ case APFloat::opStatus::opOK:
+ break;
+ case APFloat::opStatus::opOverflow:
+ return ConstantFP::get(
+ CI->getType(),
+ APFloat::getLargest(X->getSemantics(), /*Negative=*/false));
+ case APFloat::opStatus::opUnderflow:
+ return ConstantFP::get(
+ CI->getType(),
+ APFloat::getLargest(X->getSemantics(), /*Negative=*/true));
+ default:
+ return nullptr;
+ }
+ return ConstantFP::get(CI->getType(), ReturnVal);
+}
+
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -4009,6 +4044,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
return optimizeBinaryDoubleFP(CI, Builder, TLI);
return nullptr;
+ case LibFunc_fdim:
+ case LibFunc_fdimf:
+ case LibFunc_fdiml:
+ return optimizeFdim(CI, Builder);
case LibFunc_fminf:
case LibFunc_fmin:
case LibFunc_fminl:
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index bc0d7a509e1f5d..267a288345dc10 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -830,6 +830,16 @@ declare float @remquof(float, float, ptr)
; CHECK: declare x86_fp80 @remquol(x86_fp80, x86_fp80, ptr nocapture) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
declare x86_fp80 @remquol(x86_fp80, x86_fp80, ptr)
+
+; CHECK: declare double @fdim(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare double @fdim(double, double)
+
+; CHECK: declare float @fdimf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare float @fdimf(float, float)
+
+; CHECK: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare x86_fp80 @fdiml(x86_fp80, x86_fp80)
+
; CHECK: declare noundef i32 @rename(ptr nocapture noundef readonly, ptr nocapture noundef readonly) [[NOFREE_NOUNWIND]]
declare i32 @rename(ptr, ptr)
diff --git a/llvm/test/Transforms/InstCombine/fdim.ll b/llvm/test/Transforms/InstCombine/fdim.ll
new file mode 100644
index 00000000000000..6213262f1af1e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fdim.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define double @fdim_double() {
+; CHECK-LABEL: define double @fdim_double() {
+; CHECK-NEXT: ret double 2.500000e+00
+;
+ %dim = call double @fdim (double 10.5, double 8.0)
+ ret double %dim
+}
+
+define double @fdim_double1() {
+; CHECK-LABEL: define double @fdim_double1() {
+; CHECK-NEXT: ret double 0.000000e+00
+;
+ %dim = call double @fdim (double 7.0, double 8.0)
+ ret double %dim
+}
+
+define float @fdim_float() {
+; CHECK-LABEL: define float @fdim_float() {
+; CHECK-NEXT: ret float 2.500000e+00
+;
+ %dim = call float @fdimf (float 10.5, float 8.0)
+ ret float %dim
+}
+
+define float @fdim_float1() {
+; CHECK-LABEL: define float @fdim_float1() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %dim = call float @fdimf (float 7.0, float 8.0)
+ ret float %dim
+}
+
+declare double @fdim(double, double)
+declare float @fdimf(float, float)
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 47aeb0ad8fdef9..767a962402b783 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,7 +34,7 @@
#
# CHECK: << Total TLI yes SDK no: 18
# CHECK: >> Total TLI no SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 250
+# CHECK: == Total TLI yes SDK yes: 253
#
# WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
# WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
@@ -48,14 +48,14 @@
# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
# WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}}
# WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 249
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 252
#
## The -COUNT suffix doesn't care if there are too many matches, so check
## the exact count first; the two directives should add up to that.
## Yes, this means additions to TLI will fail this test, but the argument
## to -COUNT can't be an expression.
-# AVAIL: TLI knows 501 symbols, 268 available
-# AVAIL-COUNT-268: {{^}} available
+# AVAIL: TLI knows 504 symbols, 271 available
+# AVAIL-COUNT-271: {{^}} available
# AVAIL-NOT: {{^}} available
# UNAVAIL-COUNT-233: not available
# UNAVAIL-NOT: not available
@@ -814,6 +814,18 @@ DynamicSymbols:
Type: STT_FUNC
Section: .text
Binding: STB_GLOBAL
+ - Name: fdim
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: fdimf
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: fdiml
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
- Name: rewind
Type: STT_FUNC
Section: .text
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index c081c44ed35d00..ed6a91ce611acf 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -317,6 +317,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
"declare double @remquo(double, double, ptr)\n"
"declare float @remquof(float, float, ptr)\n"
"declare x86_fp80 @remquol(x86_fp80, x86_fp80, ptr)\n"
+ "declare double @fdim(double, double)\n"
+ "declare float @fdimf(float, float)\n"
+ "declare x86_fp80 @fdiml(x86_fp80, x86_fp80)\n"
"declare i32 @rename(i8*, i8*)\n"
"declare void @rewind(%struct*)\n"
"declare double @rint(double)\n"
|
case APFloat::opStatus::opOverflow: | ||
return ConstantFP::get( | ||
CI->getType(), | ||
APFloat::getLargest(X->getSemantics(), /*Negative=*/false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The result should be inf
. Please add a tests for fdim(1e308, -1e308)
.
BTW, it would be better to use fmax(x - y, 0)
instead of handling all corner cases yourself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, I read that on cppref but wasn't sure if it will handle all corner cases,
will change to that
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The special cases also set errno, in which case we cannot perform the fold unless the call is memory(none)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i changed the fold so that it converts fdim(x, y) to fmax(x-y, 0)
handling corner cases should not be needed now
case APFloat::opStatus::opUnderflow: | ||
return ConstantFP::get( | ||
CI->getType(), | ||
APFloat::getLargest(X->getSemantics(), /*Negative=*/true)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is incorrect. Underflow means the result is subnormal.
case APFloat::opStatus::opOverflow: | ||
return ConstantFP::get( | ||
CI->getType(), | ||
APFloat::getLargest(X->getSemantics(), /*Negative=*/false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The special cases also set errno, in which case we cannot perform the fold unless the call is memory(none)
%dim = call float @fdimf (float 7.0, float 8.0) | ||
ret float %dim | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs tests for all the edge cases. Can you also test poison/undef arguments in each position
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added test,
let me know if there are still some edge cases to be covered
; CHECK-LABEL: define double @fdim_double1() { | ||
; CHECK-NEXT: ret double 0.000000e+00 | ||
; | ||
%dim = call double @fdim (double 7.0, double 8.0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Extra space before ( in all functions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed
53989aa
to
04799f4
Compare
ret double %dim | ||
} | ||
|
||
define float @fdim_float() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm
this test should fail because windows should not be able to fold @fdimf, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the call should remain
✅ With the latest revision this PR passed the C/C++ code formatter. |
if (X->isNaN() || Y->isNaN()) | ||
return ConstantFP::getQNaN(CI->getType()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should prefer to preserve the payload, so return X if X is nan or Y if Y is nan
// set no-NaN fast-math-flag as we already checked for NaN for both operands | ||
FMF.setNoNaNs(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is incorrect, you only know it's not a literal nan. It could still be a nan value. If you need this, you want isKnownNeverNaN
// set no-signed-zeroes as fdim will never return -0.0 | ||
FMF.setNoSignedZeros(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No signed zeroes does not mean the result will never be -0, you should not just introduce this flag
FMF.setNoNaNs(); | ||
// set no-signed-zeroes as fdim will never return -0.0 | ||
FMF.setNoSignedZeros(); | ||
B.setFastMathFlags(FMF); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can only preserve the fast math flags that were present on the original call
Intrinsic::ID IID = Intrinsic::maxnum; | ||
return copyFlags( | ||
*CI, B.CreateBinaryIntrinsic( | ||
IID, B.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe you can also copy the flags onto the fsub
ret double %dim | ||
} | ||
|
||
define float @fdim_float() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the call should remain
; CHECK-NEXT: [[DIM:%.*]] = call double @fdim(double poison, double 1.000000e+00) | ||
; CHECK-NEXT: ret double [[DIM]] | ||
; | ||
%dim = call double @fdim(double poison, double 1.0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can fold to poison
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i tried using maximum
intrinsic, it folds
fdim(poison, 1) -> zero
fdim(undef, 1) -> 0x7FF8000000000000
fdim(poison, poison) -> zero
fdim(undef, undef) -> zero
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fdim and maximum should propagate poison, something is missing somewhere
; CHECK-NEXT: [[DIM:%.*]] = call double @fdim(double 1.000000e+00, double poison) | ||
; CHECK-NEXT: ret double [[DIM]] | ||
; | ||
%dim = call double @fdim(double 1.0, double poison) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can fold to poison
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
m_APFloat() would not match against undef/poison values
so i used m_Undef() to find any undef/poison values, and returned them
// set no-signed-zeroes as fdim will never return -0.0 | ||
FMF.setNoSignedZeros(); | ||
B.setFastMathFlags(FMF); | ||
// fdim is equivalent to fmax(x - y, 0), except for the NaN handling requirements. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I read this correctly, this wants to propagate nans instead of the return non-nan operand behavior. Can this just unconditionally use Intrinsic::maximum instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this was explicitly mentioned in cppref notes https://en.cppreference.com/w/cpp/numeric/math/fdim
so i assumed maximum intrinsic does not handles this
documentation for maxnum says the same
https://llvm.org/docs/LangRef.html#llvm-maxnum-intrinsic
"If either operand is a NaN, returns the other non-NaN operand. Returns NaN only if both operands are NaN. If the operands compare equal, returns either one of the operands. For example, this means that fmax(+0.0, -0.0) returns either -0.0 or 0.0."
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maximum, not maxnum. https://llvm.org/docs/LangRef.html#llvm-maximum-intrinsic, which is also fmaximum in C23
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maximum also has stricter signed 0 behavior than maxnum
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, i misread maxnum
yeah, maximum intrinsic actually fits the box here, thanks for the pointer
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would suggest trying to use maxnum if the call is nnan, and if not just use maximum
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually maybe just use maximum, the nnan case can transform later if needed. I'm more wondering if it is OK to add nsz to specify the relaxed zero requirement
// Check if both values are constants | ||
if (!match(CI->getArgOperand(0), m_APFloat(X)) || | ||
!match(CI->getArgOperand(1), m_APFloat(Y))) | ||
return nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't do anything with the constant values, besides check if they are nan (which isn't strong enough to do this transform)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not sure what you mean by that,
we need the NaN check and i don't see if we can do that through a Value*
, we need to transform it to constant
can you suggest any alternatives here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need to know a dynamic value cannot be nan, which isKnownNeverNaN can do
04799f4
to
8f502a7
Compare
IRBuilderBase::FastMathFlagGuard Guard(B); | ||
FastMathFlags FMF = CI->getFastMathFlags(); | ||
// set no-NaN fast-math-flag as we already checked for NaN for both operands | ||
FMF.setNoNaNs(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Even if the operands are not NaNs, the operation still can produce it, see fdim(+inf, +inf)
.
// set no-signed-zeroes as fdim will never return -0.0 | ||
FMF.setNoSignedZeros(); | ||
B.setFastMathFlags(FMF); | ||
// fdim is equivalent to fmax(x - y, 0), except for the NaN handling |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can only do this if you know that math errno isn't being used. fdim sets errno on range errors, but the fmax intrinsic does not. An example that shows this is fdim(1e308, -1e308);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
correct, I am thinking of abandoning fdim -> fmax, as this PR is for constant propogation
and i thougth fdim->fmax would mean i won't have to handle corner cases manually
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is better to just transform into the more understood ops
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree it would be better to transform to fsub+fmax, if you can be sure errno doesn't need to be set. I suppose the "memory(none)" attribute establishes that, as you noted in another commented. A case could be made that the 'afn' fast-math flag would also allow it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, so i added checks so the fold is done only when fsub
return ok and fdim has memory(none)
attribute
poison and undefs are not getting propogated, will have to look into that
8f502a7
to
a1cc021
Compare
if (X->isNaN()) | ||
return ConstantFP::get(CI->getType(), *X); | ||
if (Y->isNaN()) | ||
return ConstantFP::get(CI->getType(), *Y); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should makeQuiet the nan, in case it is signaling
APFloat::opStatus Status = | ||
Difference.subtract(*Y, RoundingMode::NearestTiesToEven); | ||
switch (Status) { | ||
case APFloat::opStatus::opOK: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's fine if rounding happened, it would happen anyway (I guess you could check this to handle the strictfp case)
|
||
const APFloat *X, *Y; | ||
// Check if both values are constants | ||
if (!match(CI->getArgOperand(0), m_APFloat(X)) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need to check the constants if you are doing the general expansion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but this is just a constant fold right?
also APFloat*
is used later to return NaN values, check status of fsub
operation
those wouldn't be possible with Value*
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
btw, sry, this PR is taking too long, I am new to LLVM and the codebase is too big ;)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No problem!
You didn't directly implement this as constant folding. It's almost a general case replacement of the call with a short sequence. A pure constant fold would be done entirely with APFloat, rather than attempting to create intermediate instructions.
It's probably a good idea to leave the general case for a follow up PR, since there are cost considerations (more targets have maxnum-like operations), and last I checked maximum was poorly supported by most backends.
|
||
declare double @fdim(double, double) #0 | ||
declare float @fdimf(float, float) #0 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add some strictfp tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lib calls don't fold for strictfp cases, added test
699a81b
to
5d907ac
Compare
// propogate poison/undef if any | ||
if (match(CI->getArgOperand(0), m_Undef())) | ||
return CI->getArgOperand(0); | ||
if (match(CI->getArgOperand(1), m_Undef())) | ||
return CI->getArgOperand(1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't quite right. You can propagate poison -> poison, but undef needs more constraints (usually undef gets folded to qnan)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo propogate
if (match(CI->getArgOperand(0), m_Undef())) | ||
return CI->getArgOperand(0); | ||
if (match(CI->getArgOperand(1), m_Undef())) | ||
return CI->getArgOperand(1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (match(CI->getArgOperand(0), m_Undef())) | |
return CI->getArgOperand(0); | |
if (match(CI->getArgOperand(1), m_Undef())) | |
return CI->getArgOperand(1); | |
if (isa<PoisonValue>(CI->getArgOperand(0)) | |
return CI->getArgOperand(0); | |
if (isa<PoisonValue>(CI->getArgOperand(1)) | |
return CI->getArgOperand(1); |
This isn't quite right. You can propagate poison -> poison, but undef needs more constraints (usually undef gets folded to qnan). For simplicity you can just handle poison
|
||
const APFloat *X, *Y; | ||
// Check if both values are constants | ||
if (!match(CI->getArgOperand(0), m_APFloat(X)) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No problem!
You didn't directly implement this as constant folding. It's almost a general case replacement of the call with a short sequence. A pure constant fold would be done entirely with APFloat, rather than attempting to create intermediate instructions.
It's probably a good idea to leave the general case for a follow up PR, since there are cost considerations (more targets have maxnum-like operations), and last I checked maximum was poorly supported by most backends.
auto *FSubCall = | ||
copyFlags(*CI, B.CreateFSub(CI->getOperand(0), CI->getOperand(1))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You did the subtract above, but then throw it away to create fsub which will constant fold.
If you want to only handle this as constant folding, you should use the APFloat Difference, and then use APFloat version of maximum.
Something like minimum(Difference, APFloat::getZero())
5d907ac
to
a67ba80
Compare
declare float @fdimf(float, float) #0 | ||
declare fp128 @fdiml(fp128, fp128) #0 | ||
|
||
attributes #0 = { memory(none)} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
attributes #0 = { memory(none)} | |
attributes #0 = { memory(none) } |
return nullptr; | ||
|
||
// TODO : Handle undef values | ||
// propagate poison if any |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// propagate poison if any | |
// Propagate poison if any |
APFloat Difference = *X; | ||
APFloat::opStatus Status = | ||
Difference.subtract(*Y, RoundingMode::NearestTiesToEven); | ||
switch (Status) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need to check the status
if (Y->isNaN()) | ||
return ConstantFP::get(CI->getType(), Y->makeQuiet()); | ||
|
||
// if X - Y overflows, it will set the errno, so we avoid the fold |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You excluded writing errno above with the doesNotAccessMemory check. Either don't need the status check, or can try to handle the writing errno known-no-overflow case
// If either argument is NaN, NaN is returned | ||
if (X->isNaN()) | ||
return ConstantFP::get(CI->getType(), X->makeQuiet()); | ||
if (Y->isNaN()) | ||
return ConstantFP::get(CI->getType(), Y->makeQuiet()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could actually just pass this through to maximum and I think you'll get the same result
|
||
declare double @fdim(double, double) #0 | ||
declare float @fdimf(float, float) #0 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the nan tests are still missing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added nan tests
0x7FF8000000000000
is the NAN value right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a (quiet) nan value. If you want to be very comprehensive you can check payloads and snan quieting
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added tests for snan
let me know if anything else is needed
ba372c9
to
deb5ec9
Compare
Signed-off-by: Kushal Pal <kushalpal109@gmail.com>
Signed-off-by: Kushal Pal <kushalpal109@gmail.com>
deb5ec9
to
1987c69
Compare
2nd PR to fix llvm#108695 based on llvm#108702 --------- Signed-off-by: Kushal Pal <kushalpal109@gmail.com>
2nd PR to fix #108695
based on #108702