-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc][math][c23] Add f16fma{,l,f128} C23 math function #96711
Conversation
@llvm/pr-subscribers-libc Author: OverMighty (overmighty) ChangesPart of #93566. Full diff: https://github.com/llvm/llvm-project/pull/96711.diff 19 Files Affected:
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index a875a17f06b3e..f798bf282bf5d 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -506,6 +506,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.ceilf16
libc.src.math.copysignf16
libc.src.math.f16divf
+ libc.src.math.f16fma
libc.src.math.f16fmaf
libc.src.math.f16sqrtf
libc.src.math.fabsf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 34748ff5950ad..9d88cf2b60222 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -537,6 +537,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.ceilf16
libc.src.math.copysignf16
libc.src.math.f16divf
+ libc.src.math.f16fma
libc.src.math.f16fmaf
libc.src.math.f16sqrtf
libc.src.math.fabsf16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 95f450ab75960..30079e8410f19 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -126,7 +126,7 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| f16div | |check| | | | N/A | | 7.12.14.4 | F.10.11 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| f16fma | |check| | | | N/A | | 7.12.14.5 | F.10.11 |
+| f16fma | |check| | |check| | | N/A | | 7.12.14.5 | F.10.11 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fabs | |check| | |check| | |check| | |check| | |check| | 7.12.7.3 | F.10.4.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 651f49deef4c1..367e6b2887dbe 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -477,6 +477,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"fma", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>]>,
+ GuardedFunctionSpec<"f16fma", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 900a7022c3868..0f27b79b059a3 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -227,3 +227,4 @@ add_header_library(
)
add_subdirectory(generic)
+add_subdirectory(generic_hardware)
diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
index 33b2564bfa087..80af697903286 100644
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -24,6 +24,7 @@ add_header_library(
libc.src.__support.CPP.bit
libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
+ libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.rounding_mode
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 71b150758d419..40a99fc6ca62e 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -13,6 +13,7 @@
#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/big_int.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
@@ -106,8 +107,6 @@ LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
sizeof(OutType) <= sizeof(InType),
OutType>
fma(InType x, InType y, InType z) {
- using OutFPBits = fputil::FPBits<OutType>;
- using OutStorageType = typename OutFPBits::StorageType;
using InFPBits = fputil::FPBits<InType>;
using InStorageType = typename InFPBits::StorageType;
@@ -115,11 +114,7 @@ fma(InType x, InType y, InType z) {
constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN;
constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
using TmpResultType = UInt<TMP_RESULT_LEN>;
-
- constexpr size_t EXTRA_FRACTION_LEN =
- TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
- constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
- (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
+ using DyadicFloat = DyadicFloat<TMP_RESULT_LEN>;
if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
return static_cast<OutType>(x * y + z);
@@ -182,7 +177,6 @@ fma(InType x, InType y, InType z) {
constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
z_mant <<= RESULT_MIN_LEN;
int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
- bool round_bit = false;
bool sticky_bits = false;
bool z_shifted = false;
@@ -221,85 +215,18 @@ fma(InType x, InType y, InType z) {
}
}
- OutStorageType result = 0;
- int r_exp = 0; // Unbiased exponent of the result
-
- int round_mode = fputil::quick_get_round();
-
- // Normalize the result.
- if (prod_mant != 0) {
- int lead_zeros = cpp::countl_zero(prod_mant);
- // Move the leading 1 to the most significant bit.
- prod_mant <<= lead_zeros;
- prod_lsb_exp -= lead_zeros;
- r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
- InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
-
- if (r_exp > 0) {
- // The result is normal. We will shift the mantissa to the right by the
- // amount of extra bits compared to the length of the explicit mantissa in
- // the output type. The rounding bit then becomes the highest bit that is
- // shifted out, and the following lower bits are merged into sticky bits.
- round_bit =
- (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
- sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
- result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
- } else {
- if (r_exp < -OutFPBits::FRACTION_LEN) {
- // The result is smaller than 1/2 of the smallest denormal number.
- sticky_bits = true; // since the result is non-zero.
- result = 0;
- } else {
- // The result is denormal.
- TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
- round_bit = (prod_mant & mask) != 0;
- sticky_bits |= (prod_mant & (mask - 1)) != 0;
- if (r_exp > -OutFPBits::FRACTION_LEN)
- result = static_cast<OutStorageType>(
- prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
- else
- result = 0;
- }
-
- r_exp = 0;
- }
- } else {
+ if (prod_mant == 0) {
// When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if
// rounding downward and +0.0 for other rounding modes.
- if (round_mode == FE_DOWNWARD)
+ if (quick_get_round() == FE_DOWNWARD)
prod_sign = Sign::NEG;
else
prod_sign = Sign::POS;
}
- // Finalize the result.
- if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
- if ((round_mode == FE_TOWARDZERO) ||
- (round_mode == FE_UPWARD && prod_sign.is_neg()) ||
- (round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
- return OutFPBits::max_normal(prod_sign).get_val();
- }
- return OutFPBits::inf(prod_sign).get_val();
- }
-
- // Remove hidden bit and append the exponent field and sign bit.
- result = static_cast<OutStorageType>(
- (result & OutFPBits::FRACTION_MASK) |
- (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
- if (prod_sign.is_neg())
- result |= OutFPBits::SIGN_MASK;
-
- // Rounding.
- if (round_mode == FE_TONEAREST) {
- if (round_bit && (sticky_bits || ((result & 1) != 0)))
- ++result;
- } else if ((round_mode == FE_UPWARD && prod_sign.is_pos()) ||
- (round_mode == FE_DOWNWARD && prod_sign.is_neg())) {
- if (round_bit || sticky_bits)
- ++result;
- }
-
- return cpp::bit_cast<OutType>(result);
+ DyadicFloat result(prod_sign, prod_lsb_exp - InFPBits::EXP_BIAS, prod_mant);
+ result.mantissa |= sticky_bits;
+ return result.template as<OutType, /*ShouldSignalExceptions=*/true>();
}
} // namespace generic
diff --git a/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt b/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
new file mode 100644
index 0000000000000..a094d7f8a6f00
--- /dev/null
+++ b/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_header_library(
+ fma
+ HDRS
+ fma.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.macros.properties.cpu_features
+ FLAGS
+ FMA_OPT
+)
diff --git a/libc/src/__support/FPUtil/generic_hardware/fma.h b/libc/src/__support/FPUtil/generic_hardware/fma.h
new file mode 100644
index 0000000000000..f878728cd2de5
--- /dev/null
+++ b/libc/src/__support/FPUtil/generic_hardware/fma.h
@@ -0,0 +1,29 @@
+//===-- Generic hardware implementation of fused multiply-add ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
+#define LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
+
+#include "src/__support/common.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE::fputil::generic_hardware {
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+LIBC_INLINE float fma(float x, float y, float z) {
+ return __builtin_fmaf(x, y, z);
+}
+
+LIBC_INLINE double fma(double x, double y, double z) {
+ return __builtin_fma(x, y, z);
+}
+#endif // LIBC_TARGET_CPU_HAS_FMA
+
+} // namespace LIBC_NAMESPACE::fputil::generic_hardware
+
+#endif // LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 622914e4265c9..9683c526aee72 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -39,17 +39,17 @@ multiply_add(T x, T y, T z) {
#if defined(LIBC_TARGET_CPU_HAS_FMA)
// FMA instructions are available.
-#include "FMA.h"
+#include "src/__support/FPUtil/generic_hardware/fma.h"
namespace LIBC_NAMESPACE {
namespace fputil {
LIBC_INLINE float multiply_add(float x, float y, float z) {
- return fma<float>(x, y, z);
+ return generic_hardware::fma(x, y, z);
}
LIBC_INLINE double multiply_add(double x, double y, double z) {
- return fma<double>(x, y, z);
+ return generic_hardware::fma(x, y, z);
}
} // namespace fputil
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 711cbf8bbfdca..e0a59b33b8fc7 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -101,6 +101,7 @@ add_math_entrypoint_object(expm1f)
add_math_entrypoint_object(f16divf)
+add_math_entrypoint_object(f16fma)
add_math_entrypoint_object(f16fmaf)
add_math_entrypoint_object(f16sqrtf)
diff --git a/libc/src/math/f16fma.h b/libc/src/math/f16fma.h
new file mode 100644
index 0000000000000..d9505f88f37af
--- /dev/null
+++ b/libc/src/math/f16fma.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fma ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMA_H
+#define LLVM_LIBC_SRC_MATH_F16FMA_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fma(double x, double y, double z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMA_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index fc2024c89b5df..29a3cc79dd239 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3744,6 +3744,19 @@ add_entrypoint_object(
-O3
)
+add_entrypoint_object(
+ f16fma
+ SRCS
+ f16fma.cpp
+ HDRS
+ ../f16fma.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.fma
+ COMPILE_OPTIONS
+ -O0 -ggdb3
+)
+
add_entrypoint_object(
f16fmaf
SRCS
diff --git a/libc/src/math/generic/f16fma.cpp b/libc/src/math/generic/f16fma.cpp
new file mode 100644
index 0000000000000..10ee028c06930
--- /dev/null
+++ b/libc/src/math/generic/f16fma.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16fma function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fma.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fma, (double x, double y, double z)) {
+ return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index ba588662f469e..ab3b155f0f92b 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1903,6 +1903,21 @@ add_fp_unittest(
libc.src.math.f16divf
)
+add_fp_unittest(
+ f16fma_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ f16fma_test.cpp
+ HDRS
+ FmaTest.h
+ DEPENDS
+ libc.src.math.f16fma
+ libc.src.stdlib.rand
+ libc.src.stdlib.srand
+)
+
add_fp_unittest(
f16fmaf_test
NEED_MPFR
diff --git a/libc/test/src/math/f16fma_test.cpp b/libc/test/src/math/f16fma_test.cpp
new file mode 100644
index 0000000000000..d684c4f304fbc
--- /dev/null
+++ b/libc/test/src/math/f16fma_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for f16fma ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fma.h"
+
+using LlvmLibcF16fmaTest = FmaTestTemplate<float16, double>;
+
+TEST_F(LlvmLibcF16fmaTest, SubnormalRange) {
+ test_subnormal_range(&LIBC_NAMESPACE::f16fma);
+}
+
+TEST_F(LlvmLibcF16fmaTest, NormalRange) {
+ test_normal_range(&LIBC_NAMESPACE::f16fma);
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index ee99fb96a52ce..21e52a917349c 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3644,6 +3644,18 @@ add_fp_unittest(
libc.src.math.f16divf
)
+add_fp_unittest(
+ f16fma_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ f16fma_test.cpp
+ HDRS
+ FmaTest.h
+ DEPENDS
+ libc.src.math.f16fma
+)
+
add_fp_unittest(
f16fmaf_test
SUITE
diff --git a/libc/test/src/math/smoke/f16fma_test.cpp b/libc/test/src/math/smoke/f16fma_test.cpp
new file mode 100644
index 0000000000000..2e46b5bdd4682
--- /dev/null
+++ b/libc/test/src/math/smoke/f16fma_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for f16fma ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fma.h"
+
+LIST_NARROWING_FMA_TESTS(float16, double, LIBC_NAMESPACE::f16fma)
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 521c2658b327a..d1c814b6bf18f 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -977,6 +977,8 @@ explain_ternary_operation_one_output_error(Operation,
#ifdef LIBC_TYPES_HAS_FLOAT16
template void explain_ternary_operation_one_output_error(
Operation, const TernaryInput<float> &, float16, double, RoundingMode);
+template void explain_ternary_operation_one_output_error(
+ Operation, const TernaryInput<double> &, float16, double, RoundingMode);
#endif
template <typename InputType, typename OutputType>
@@ -1124,6 +1126,10 @@ template bool compare_ternary_operation_one_output(Operation,
const TernaryInput<float> &,
float16, double,
RoundingMode);
+template bool compare_ternary_operation_one_output(Operation,
+ const TernaryInput<double> &,
+ float16, double,
+ RoundingMode);
#endif
} // namespace internal
|
// FMA instructions are available. | ||
#include "FMA.h" | ||
#include "src/__support/FPUtil/generic_hardware/fma.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did this to fix a circular dependency where dyadic_float.h was including multiply_add.h -> FMA.h -> generic/FMA.h -> dyadic_float.h
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is the only place using it right now, maybe we can just directly use __builtin_fma(f)
here, and adding comments about circular dependency.
Update FPUtil/CMakeLists.txt to fix circular dependency.
Change multiply_add.h to use FMA builtins directly.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/104/builds/936 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/71/builds/932 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/131/builds/913 Here is the relevant piece of the build log for the reference:
|
Part of #93566.