[SimplifyLibCalls] fdim constant fold #109235

braw-lee · 2024-09-19T05:20:16Z

2nd PR to fix #108695

based on #108702

llvmbot · 2024-09-19T05:20:51Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: None (braw-lee)

Changes

2nd PR to fix #108695

based on #108702

Full diff: https://github.com/llvm/llvm-project/pull/109235.diff

9 Files Affected:

(modified) llvm/include/llvm/Analysis/TargetLibraryInfo.def (+15)
(modified) llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h (+1)
(modified) llvm/lib/Analysis/TargetLibraryInfo.cpp (+2)
(modified) llvm/lib/Transforms/Utils/BuildLibCalls.cpp (+3)
(modified) llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp (+39)
(modified) llvm/test/Transforms/InferFunctionAttrs/annotate.ll (+10)
(added) llvm/test/Transforms/InstCombine/fdim.ll (+37)
(modified) llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml (+16-4)
(modified) llvm/unittests/Analysis/TargetLibraryInfoTest.cpp (+3)

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 5914324b286c05..a18952f038cc31 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -2067,6 +2067,21 @@ TLI_DEFINE_ENUM_INTERNAL(remquol)
 TLI_DEFINE_STRING_INTERNAL("remquol")
 TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl, Ptr)
 
+/// double fdim(double x, double y);
+TLI_DEFINE_ENUM_INTERNAL(fdim)
+TLI_DEFINE_STRING_INTERNAL("fdim")
+TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl, Dbl)
+
+/// float fdimf(float x, float y);
+TLI_DEFINE_ENUM_INTERNAL(fdimf)
+TLI_DEFINE_STRING_INTERNAL("fdimf")
+TLI_DEFINE_SIG_INTERNAL(Flt, Flt, Flt)
+
+/// long double fdiml(long double x, long double y);
+TLI_DEFINE_ENUM_INTERNAL(fdiml)
+TLI_DEFINE_STRING_INTERNAL("fdiml")
+TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl)
+
 /// int remove(const char *path);
 TLI_DEFINE_ENUM_INTERNAL(remove)
 TLI_DEFINE_STRING_INTERNAL("remove")
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 2e7a0ec29ed999..467a8f25386b71 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -211,6 +211,7 @@ class LibCallSimplifier {
   Value *optimizeTrigInversionPairs(CallInst *CI, IRBuilderBase &B);
   Value *optimizeSymmetric(CallInst *CI, LibFunc Func, IRBuilderBase &B);
   Value *optimizeRemquo(CallInst *CI, IRBuilderBase &B);
+  Value *optimizeFdim(CallInst *CI, IRBuilderBase &B);
   // Wrapper for all floating point library call optimizations
   Value *optimizeFloatingPointLibCall(CallInst *CI, LibFunc Func,
                                       IRBuilderBase &B);
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 47413239f3c6cc..1785d77bca985c 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -306,6 +306,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
       TLI.setUnavailable(LibFunc_powf);
       TLI.setUnavailable(LibFunc_remainderf);
       TLI.setUnavailable(LibFunc_remquof);
+      TLI.setUnavailable(LibFunc_fdimf);
       TLI.setUnavailable(LibFunc_sinf);
       TLI.setUnavailable(LibFunc_sinhf);
       TLI.setUnavailable(LibFunc_sqrtf);
@@ -337,6 +338,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_powl);
     TLI.setUnavailable(LibFunc_remainderl);
     TLI.setUnavailable(LibFunc_remquol);
+    TLI.setUnavailable(LibFunc_fdiml);
     TLI.setUnavailable(LibFunc_sinl);
     TLI.setUnavailable(LibFunc_sinhl);
     TLI.setUnavailable(LibFunc_sqrtl);
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index b0da19813f0a4b..abe0d8c3d2e999 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1191,6 +1191,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
   case LibFunc_fabs:
   case LibFunc_fabsf:
   case LibFunc_fabsl:
+  case LibFunc_fdim:
+  case LibFunc_fdiml:
+  case LibFunc_fdimf:
   case LibFunc_ffs:
   case LibFunc_ffsl:
   case LibFunc_ffsll:
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 917f81863cf673..5f3bb5fa060729 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -3080,6 +3080,41 @@ Value *LibCallSimplifier::optimizeRemquo(CallInst *CI, IRBuilderBase &B) {
   return ConstantFP::get(CI->getType(), Rem);
 }
 
+/// Constant folds fdim
+Value *LibCallSimplifier::optimizeFdim(CallInst *CI, IRBuilderBase &B) {
+  const APFloat *X, *Y;
+  // Check if both values are constants
+  if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
+      !match(CI->getArgOperand(1), m_APFloat(Y)))
+    return nullptr;
+  // If either argument is NaN, NaN is returned
+  if (X->isNaN() || Y->isNaN())
+    return ConstantFP::getQNaN(CI->getType());
+
+  // If the difference if negative, return +0.0
+  if (*X <= *Y)
+    return ConstantFP::get(CI->getType(), 0.0);
+
+  APFloat ReturnVal = *X;
+  APFloat::opStatus Status =
+      ReturnVal.subtract(*Y, RoundingMode::NearestTiesToEven);
+  switch (Status) {
+  case APFloat::opStatus::opOK:
+    break;
+  case APFloat::opStatus::opOverflow:
+    return ConstantFP::get(
+        CI->getType(),
+        APFloat::getLargest(X->getSemantics(), /*Negative=*/false));
+  case APFloat::opStatus::opUnderflow:
+    return ConstantFP::get(
+        CI->getType(),
+        APFloat::getLargest(X->getSemantics(), /*Negative=*/true));
+  default:
+    return nullptr;
+  }
+  return ConstantFP::get(CI->getType(), ReturnVal);
+}
+
 //===----------------------------------------------------------------------===//
 // Integer Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -4009,6 +4044,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
     if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
       return optimizeBinaryDoubleFP(CI, Builder, TLI);
     return nullptr;
+  case LibFunc_fdim:
+  case LibFunc_fdimf:
+  case LibFunc_fdiml:
+    return optimizeFdim(CI, Builder);
   case LibFunc_fminf:
   case LibFunc_fmin:
   case LibFunc_fminl:
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index bc0d7a509e1f5d..267a288345dc10 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -830,6 +830,16 @@ declare float @remquof(float, float, ptr)
 ; CHECK: declare x86_fp80 @remquol(x86_fp80, x86_fp80, ptr nocapture) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
 declare x86_fp80 @remquol(x86_fp80, x86_fp80, ptr)
 
+
+; CHECK: declare double @fdim(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare double @fdim(double, double)
+
+; CHECK: declare float @fdimf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare float @fdimf(float, float)
+
+; CHECK: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare x86_fp80 @fdiml(x86_fp80, x86_fp80)
+
 ; CHECK: declare noundef i32 @rename(ptr nocapture noundef readonly, ptr nocapture noundef readonly) [[NOFREE_NOUNWIND]]
 declare i32 @rename(ptr, ptr)
 
diff --git a/llvm/test/Transforms/InstCombine/fdim.ll b/llvm/test/Transforms/InstCombine/fdim.ll
new file mode 100644
index 00000000000000..6213262f1af1e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fdim.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define double @fdim_double() {
+; CHECK-LABEL: define double @fdim_double() {
+; CHECK-NEXT:    ret double 2.500000e+00
+;
+  %dim = call double @fdim (double 10.5, double 8.0)
+  ret double %dim
+}
+
+define double @fdim_double1() {
+; CHECK-LABEL: define double @fdim_double1() {
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+  %dim = call double @fdim (double 7.0, double 8.0)
+  ret double %dim
+}
+
+define float @fdim_float() {
+; CHECK-LABEL: define float @fdim_float() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %dim = call float @fdimf (float 10.5, float 8.0)
+  ret float %dim
+}
+
+define float @fdim_float1() {
+; CHECK-LABEL: define float @fdim_float1() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %dim = call float @fdimf (float 7.0, float 8.0)
+  ret float %dim
+}
+
+declare double @fdim(double, double)
+declare float @fdimf(float, float)
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 47aeb0ad8fdef9..767a962402b783 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,7 +34,7 @@
 #
 # CHECK: << Total TLI yes SDK no:  18
 # CHECK: >> Total TLI no  SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 250
+# CHECK: == Total TLI yes SDK yes: 253
 #
 # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
 # WRONG_DETAIL: >> TLI no  SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
@@ -48,14 +48,14 @@
 # WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
 # WRONG_SUMMARY: << Total TLI yes SDK no:  19{{$}}
 # WRONG_SUMMARY: >> Total TLI no  SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 249
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 252
 #
 ## The -COUNT suffix doesn't care if there are too many matches, so check
 ## the exact count first; the two directives should add up to that.
 ## Yes, this means additions to TLI will fail this test, but the argument
 ## to -COUNT can't be an expression.
-# AVAIL: TLI knows 501 symbols, 268 available
-# AVAIL-COUNT-268: {{^}} available
+# AVAIL: TLI knows 504 symbols, 271 available
+# AVAIL-COUNT-271: {{^}} available
 # AVAIL-NOT:       {{^}} available
 # UNAVAIL-COUNT-233: not available
 # UNAVAIL-NOT:       not available
@@ -814,6 +814,18 @@ DynamicSymbols:
     Type:            STT_FUNC
     Section:         .text
     Binding:         STB_GLOBAL
+  - Name:            fdim
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+  - Name:            fdimf
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+  - Name:            fdiml
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
   - Name:            rewind
     Type:            STT_FUNC
     Section:         .text
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index c081c44ed35d00..ed6a91ce611acf 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -317,6 +317,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare double @remquo(double, double, ptr)\n"
       "declare float @remquof(float, float, ptr)\n"
       "declare x86_fp80 @remquol(x86_fp80, x86_fp80, ptr)\n"
+      "declare double @fdim(double, double)\n"
+      "declare float @fdimf(float, float)\n"
+      "declare x86_fp80 @fdiml(x86_fp80, x86_fp80)\n"
       "declare i32 @rename(i8*, i8*)\n"
       "declare void @rewind(%struct*)\n"
       "declare double @rint(double)\n"

dtcxzyw · 2024-09-19T07:56:39Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  case APFloat::opStatus::opOverflow:
+    return ConstantFP::get(
+        CI->getType(),
+        APFloat::getLargest(X->getSemantics(), /*Negative=*/false));


The result should be inf. Please add a tests for fdim(1e308, -1e308).

BTW, it would be better to use fmax(x - y, 0) instead of handling all corner cases yourself.

yes, I read that on cppref but wasn't sure if it will handle all corner cases,
will change to that

The special cases also set errno, in which case we cannot perform the fold unless the call is memory(none)

i changed the fold so that it converts fdim(x, y) to fmax(x-y, 0)
handling corner cases should not be needed now

dtcxzyw · 2024-09-19T07:58:16Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  case APFloat::opStatus::opUnderflow:
+    return ConstantFP::get(
+        CI->getType(),
+        APFloat::getLargest(X->getSemantics(), /*Negative=*/true));


It is incorrect. Underflow means the result is subnormal.

arsenm · 2024-09-20T04:51:57Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  case APFloat::opStatus::opOverflow:
+    return ConstantFP::get(
+        CI->getType(),
+        APFloat::getLargest(X->getSemantics(), /*Negative=*/false));


The special cases also set errno, in which case we cannot perform the fold unless the call is memory(none)

arsenm · 2024-09-20T04:52:45Z

llvm/test/Transforms/InstCombine/fdim.ll

+  %dim = call float @fdimf (float 7.0, float 8.0)
+  ret float %dim
+}
+


Needs tests for all the edge cases. Can you also test poison/undef arguments in each position

Added test,
let me know if there are still some edge cases to be covered

arsenm · 2024-09-20T04:52:51Z

llvm/test/Transforms/InstCombine/fdim.ll

+; CHECK-LABEL: define double @fdim_double1() {
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+  %dim = call double @fdim (double 7.0, double 8.0)


Extra space before ( in all functions

braw-lee · 2024-09-25T10:38:44Z

llvm/test/Transforms/InstCombine/win-fdim.ll

+  ret double %dim
+}
+
+define float @fdim_float() {


@arsenm
this test should fail because windows should not be able to fold @fdimf, right?

Yes, the call should remain

github-actions · 2024-09-25T10:39:46Z

✅ With the latest revision this PR passed the C/C++ code formatter.

arsenm · 2024-09-25T10:39:37Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  if (X->isNaN() || Y->isNaN())
+    return ConstantFP::getQNaN(CI->getType());


This should prefer to preserve the payload, so return X if X is nan or Y if Y is nan

arsenm · 2024-09-25T10:40:24Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // set no-NaN fast-math-flag as we already checked for NaN for both operands
+  FMF.setNoNaNs();


This is incorrect, you only know it's not a literal nan. It could still be a nan value. If you need this, you want isKnownNeverNaN

arsenm · 2024-09-25T10:40:55Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // set no-signed-zeroes as fdim will never return -0.0
+  FMF.setNoSignedZeros();


No signed zeroes does not mean the result will never be -0, you should not just introduce this flag

arsenm · 2024-09-25T10:41:09Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  FMF.setNoNaNs();
+  // set no-signed-zeroes as fdim will never return -0.0
+  FMF.setNoSignedZeros();
+  B.setFastMathFlags(FMF);


You can only preserve the fast math flags that were present on the original call

arsenm · 2024-09-25T10:41:45Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  Intrinsic::ID IID = Intrinsic::maxnum;
+  return copyFlags(
+      *CI, B.CreateBinaryIntrinsic(
+               IID, B.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)),


I believe you can also copy the flags onto the fsub

arsenm · 2024-09-25T10:44:34Z

llvm/test/Transforms/InstCombine/win-fdim.ll

+  ret double %dim
+}
+
+define float @fdim_float() {


Yes, the call should remain

arsenm · 2024-09-25T10:45:05Z

llvm/test/Transforms/InstCombine/fdim.ll

+; CHECK-NEXT:    [[DIM:%.*]] = call double @fdim(double poison, double 1.000000e+00)
+; CHECK-NEXT:    ret double [[DIM]]
+;
+  %dim = call double @fdim(double poison, double 1.0)


This can fold to poison

i tried using maximum intrinsic, it folds
fdim(poison, 1) -> zero
fdim(undef, 1) -> 0x7FF8000000000000
fdim(poison, poison) -> zero
fdim(undef, undef) -> zero

fdim and maximum should propagate poison, something is missing somewhere

arsenm · 2024-09-25T10:45:12Z

llvm/test/Transforms/InstCombine/fdim.ll

+; CHECK-NEXT:    [[DIM:%.*]] = call double @fdim(double 1.000000e+00, double poison)
+; CHECK-NEXT:    ret double [[DIM]]
+;
+  %dim = call double @fdim(double 1.0, double poison)


This can fold to poison

m_APFloat() would not match against undef/poison values
so i used m_Undef() to find any undef/poison values, and returned them

arsenm · 2024-09-25T10:48:25Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // set no-signed-zeroes as fdim will never return -0.0
+  FMF.setNoSignedZeros();
+  B.setFastMathFlags(FMF);
+  // fdim is equivalent to fmax(x - y, 0), except for the NaN handling requirements. 


If I read this correctly, this wants to propagate nans instead of the return non-nan operand behavior. Can this just unconditionally use Intrinsic::maximum instead?

this was explicitly mentioned in cppref notes https://en.cppreference.com/w/cpp/numeric/math/fdim
so i assumed maximum intrinsic does not handles this

documentation for maxnum says the same
https://llvm.org/docs/LangRef.html#llvm-maxnum-intrinsic
"If either operand is a NaN, returns the other non-NaN operand. Returns NaN only if both operands are NaN. If the operands compare equal, returns either one of the operands. For example, this means that fmax(+0.0, -0.0) returns either -0.0 or 0.0."

Maximum, not maxnum. https://llvm.org/docs/LangRef.html#llvm-maximum-intrinsic, which is also fmaximum in C23

maximum also has stricter signed 0 behavior than maxnum

oh, i misread maxnum
yeah, maximum intrinsic actually fits the box here, thanks for the pointer

I would suggest trying to use maxnum if the call is nnan, and if not just use maximum

Actually maybe just use maximum, the nnan case can transform later if needed. I'm more wondering if it is OK to add nsz to specify the relaxed zero requirement

arsenm · 2024-09-25T10:49:19Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // Check if both values are constants
+  if (!match(CI->getArgOperand(0), m_APFloat(X)) ||
+      !match(CI->getArgOperand(1), m_APFloat(Y)))
+    return nullptr;


You don't do anything with the constant values, besides check if they are nan (which isn't strong enough to do this transform)

not sure what you mean by that,
we need the NaN check and i don't see if we can do that through a Value*, we need to transform it to constant
can you suggest any alternatives here?

You need to know a dynamic value cannot be nan, which isKnownNeverNaN can do

spavloff · 2024-09-27T06:59:00Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  IRBuilderBase::FastMathFlagGuard Guard(B);
+  FastMathFlags FMF = CI->getFastMathFlags();
+  // set no-NaN fast-math-flag as we already checked for NaN for both operands
+  FMF.setNoNaNs();


Even if the operands are not NaNs, the operation still can produce it, see fdim(+inf, +inf).

andykaylor · 2024-09-27T18:26:41Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // set no-signed-zeroes as fdim will never return -0.0
+  FMF.setNoSignedZeros();
+  B.setFastMathFlags(FMF);
+  // fdim is equivalent to fmax(x - y, 0), except for the NaN handling


I think you can only do this if you know that math errno isn't being used. fdim sets errno on range errors, but the fmax intrinsic does not. An example that shows this is fdim(1e308, -1e308);

correct, I am thinking of abandoning fdim -> fmax, as this PR is for constant propogation
and i thougth fdim->fmax would mean i won't have to handle corner cases manually

I think it is better to just transform into the more understood ops

I agree it would be better to transform to fsub+fmax, if you can be sure errno doesn't need to be set. I suppose the "memory(none)" attribute establishes that, as you noted in another commented. A case could be made that the 'afn' fast-math flag would also allow it.

ok, so i added checks so the fold is done only when fsub return ok and fdim has memory(none) attribute

poison and undefs are not getting propogated, will have to look into that

arsenm · 2024-10-07T16:38:36Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  if (X->isNaN())
+    return ConstantFP::get(CI->getType(), *X);
+  if (Y->isNaN())
+    return ConstantFP::get(CI->getType(), *Y);


Should makeQuiet the nan, in case it is signaling

arsenm · 2024-10-07T16:38:56Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  APFloat::opStatus Status =
+      Difference.subtract(*Y, RoundingMode::NearestTiesToEven);
+  switch (Status) {
+  case APFloat::opStatus::opOK:


It's fine if rounding happened, it would happen anyway (I guess you could check this to handle the strictfp case)

arsenm · 2024-10-07T16:39:25Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+
+  const APFloat *X, *Y;
+  // Check if both values are constants
+  if (!match(CI->getArgOperand(0), m_APFloat(X)) ||


Don't need to check the constants if you are doing the general expansion

but this is just a constant fold right?
also APFloat* is used later to return NaN values, check status of fsub operation
those wouldn't be possible with Value*

btw, sry, this PR is taking too long, I am new to LLVM and the codebase is too big ;)

No problem!

You didn't directly implement this as constant folding. It's almost a general case replacement of the call with a short sequence. A pure constant fold would be done entirely with APFloat, rather than attempting to create intermediate instructions.

It's probably a good idea to leave the general case for a follow up PR, since there are cost considerations (more targets have maxnum-like operations), and last I checked maximum was poorly supported by most backends.

arsenm · 2024-10-07T16:39:47Z

llvm/test/Transforms/InstCombine/fdim.ll

+
+declare double @fdim(double, double) #0
+declare float @fdimf(float, float) #0
+


Add some strictfp tests

lib calls don't fold for strictfp cases, added test

arsenm · 2024-10-08T20:54:03Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // propogate poison/undef if any
+  if (match(CI->getArgOperand(0), m_Undef()))
+    return CI->getArgOperand(0);
+  if (match(CI->getArgOperand(1), m_Undef()))
+    return CI->getArgOperand(1);


This isn't quite right. You can propagate poison -> poison, but undef needs more constraints (usually undef gets folded to qnan)

Typo propogate

arsenm · 2024-10-08T20:56:11Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  if (match(CI->getArgOperand(0), m_Undef()))
+    return CI->getArgOperand(0);
+  if (match(CI->getArgOperand(1), m_Undef()))
+    return CI->getArgOperand(1);


Suggested change

if (match(CI->getArgOperand(0), m_Undef()))

return CI->getArgOperand(0);

if (match(CI->getArgOperand(1), m_Undef()))

return CI->getArgOperand(1);

if (isa<PoisonValue>(CI->getArgOperand(0))

return CI->getArgOperand(0);

if (isa<PoisonValue>(CI->getArgOperand(1))

return CI->getArgOperand(1);

This isn't quite right. You can propagate poison -> poison, but undef needs more constraints (usually undef gets folded to qnan). For simplicity you can just handle poison

arsenm · 2024-10-08T20:58:36Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+
+  const APFloat *X, *Y;
+  // Check if both values are constants
+  if (!match(CI->getArgOperand(0), m_APFloat(X)) ||


No problem!

You didn't directly implement this as constant folding. It's almost a general case replacement of the call with a short sequence. A pure constant fold would be done entirely with APFloat, rather than attempting to create intermediate instructions.

It's probably a good idea to leave the general case for a follow up PR, since there are cost considerations (more targets have maxnum-like operations), and last I checked maximum was poorly supported by most backends.

arsenm · 2024-10-08T21:01:59Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  auto *FSubCall =
+      copyFlags(*CI, B.CreateFSub(CI->getOperand(0), CI->getOperand(1)));


You did the subtract above, but then throw it away to create fsub which will constant fold.

If you want to only handle this as constant folding, you should use the APFloat Difference, and then use APFloat version of maximum.

Something like minimum(Difference, APFloat::getZero())

arsenm · 2024-10-09T15:04:41Z

llvm/test/Transforms/InstCombine/win-fdim.ll

+declare float @fdimf(float, float) #0
+declare fp128 @fdiml(fp128, fp128) #0
+
+attributes #0 = { memory(none)}


Suggested change

attributes #0 = { memory(none)}

attributes #0 = { memory(none) }

arsenm · 2024-10-09T15:08:13Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+    return nullptr;
+
+  // TODO : Handle undef values
+  // propagate poison if any


Suggested change

// propagate poison if any

// Propagate poison if any

arsenm · 2024-10-09T15:09:24Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  APFloat Difference = *X;
+  APFloat::opStatus Status =
+      Difference.subtract(*Y, RoundingMode::NearestTiesToEven);
+  switch (Status) {


Don't need to check the status

arsenm · 2024-10-09T15:11:49Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  if (Y->isNaN())
+    return ConstantFP::get(CI->getType(), Y->makeQuiet());
+
+  // if X - Y overflows, it will set the errno, so we avoid the fold


You excluded writing errno above with the doesNotAccessMemory check. Either don't need the status check, or can try to handle the writing errno known-no-overflow case

arsenm · 2024-10-09T15:12:07Z

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+  // If either argument is NaN, NaN is returned
+  if (X->isNaN())
+    return ConstantFP::get(CI->getType(), X->makeQuiet());
+  if (Y->isNaN())
+    return ConstantFP::get(CI->getType(), Y->makeQuiet());


You could actually just pass this through to maximum and I think you'll get the same result

arsenm · 2024-10-09T15:12:49Z

llvm/test/Transforms/InstCombine/fdim.ll

+
+declare double @fdim(double, double) #0
+declare float @fdimf(float, float) #0
+


I think the nan tests are still missing

added nan tests
0x7FF8000000000000 is the NAN value right?

It's a (quiet) nan value. If you want to be very comprehensive you can check payloads and snan quieting

added tests for snan
let me know if anything else is needed

Signed-off-by: Kushal Pal <kushalpal109@gmail.com>

2nd PR to fix llvm#108695 based on llvm#108702 --------- Signed-off-by: Kushal Pal <kushalpal109@gmail.com>

llvmbot added llvm:analysis llvm:transforms labels Sep 19, 2024

dtcxzyw requested a review from arsenm September 19, 2024 07:50

dtcxzyw reviewed Sep 19, 2024

View reviewed changes

arsenm added the floating-point Floating-point math label Sep 20, 2024

arsenm reviewed Sep 20, 2024

View reviewed changes

braw-lee force-pushed the fdim_constant_fold branch from 53989aa to 04799f4 Compare September 25, 2024 10:36

nikic changed the title ~~Fdim constant fold~~ [SimplifyLibCalls] fdim constant fold Sep 25, 2024

braw-lee commented Sep 25, 2024

View reviewed changes

arsenm requested changes Sep 25, 2024

View reviewed changes

braw-lee force-pushed the fdim_constant_fold branch from 04799f4 to 8f502a7 Compare September 25, 2024 13:23

arsenm requested review from spavloff, andykaylor, kpneal and jcranmer-intel September 25, 2024 13:26

spavloff reviewed Sep 27, 2024

View reviewed changes

andykaylor reviewed Sep 27, 2024

View reviewed changes

braw-lee force-pushed the fdim_constant_fold branch from 8f502a7 to a1cc021 Compare October 7, 2024 07:50

arsenm reviewed Oct 7, 2024

View reviewed changes

braw-lee force-pushed the fdim_constant_fold branch 2 times, most recently from 699a81b to 5d907ac Compare October 8, 2024 06:01

arsenm reviewed Oct 8, 2024

View reviewed changes

braw-lee force-pushed the fdim_constant_fold branch from 5d907ac to a67ba80 Compare October 9, 2024 13:47

arsenm reviewed Oct 9, 2024

View reviewed changes

braw-lee force-pushed the fdim_constant_fold branch from ba372c9 to deb5ec9 Compare October 10, 2024 06:26

braw-lee added 2 commits October 10, 2024 14:37

[SimplifyLibCalls] Constant fold fdim

672ab25

Signed-off-by: Kushal Pal <kushalpal109@gmail.com>

[SimplifyLibCalls] Added test for lib calls on win

1987c69

Signed-off-by: Kushal Pal <kushalpal109@gmail.com>

braw-lee force-pushed the fdim_constant_fold branch from deb5ec9 to 1987c69 Compare October 10, 2024 09:08

arsenm approved these changes Oct 10, 2024

View reviewed changes

arsenm merged commit 3645c64 into llvm:main Oct 10, 2024
8 checks passed

DanielCChen pushed a commit to DanielCChen/llvm-project that referenced this pull request Oct 16, 2024

[SimplifyLibCalls] fdim constant fold (llvm#109235)

fa8c108

2nd PR to fix llvm#108695 based on llvm#108702 --------- Signed-off-by: Kushal Pal <kushalpal109@gmail.com>

		if (X->isNaN() \|\| Y->isNaN())
		return ConstantFP::getQNaN(CI->getType());

		// set no-NaN fast-math-flag as we already checked for NaN for both operands
		FMF.setNoNaNs();

		// set no-signed-zeroes as fdim will never return -0.0
		FMF.setNoSignedZeros();


		declare double @fdim(double, double) #0
		declare float @fdimf(float, float) #0

		auto *FSubCall =
		copyFlags(*CI, B.CreateFSub(CI->getOperand(0), CI->getOperand(1)));

	attributes #0 = { memory(none)}
	attributes #0 = { memory(none) }

[SimplifyLibCalls] fdim constant fold #109235

[SimplifyLibCalls] fdim constant fold #109235

Conversation

braw-lee commented Sep 19, 2024

llvmbot commented Sep 19, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

github-actions bot commented Sep 25, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

braw-lee Sep 25, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

arsenm Oct 10, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

llvmbot commented Sep 19, 2024 •

edited

Loading

github-actions bot commented Sep 25, 2024 •

edited

Loading

braw-lee Sep 25, 2024 •

edited

Loading

arsenm Oct 10, 2024 •

edited

Loading