[libc][math][c23] Add f16{add,sub}f C23 math functions (#96787)

Part of #93566.
llvm · Jul 2, 2024 · 12a1e6d · 12a1e6d
1 parent 5b77ed4
commit 12a1e6d
Show file tree

Hide file tree

Showing 24 changed files with 934 additions and 1 deletion.
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -508,11 +508,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.canonicalizef16
     libc.src.math.ceilf16
     libc.src.math.copysignf16
+    libc.src.math.f16addf
     libc.src.math.f16div
     libc.src.math.f16divf
     libc.src.math.f16fmaf
     libc.src.math.f16sqrt
     libc.src.math.f16sqrtf
+    libc.src.math.f16subf
     libc.src.math.fabsf16
     libc.src.math.fdimf16
     libc.src.math.floorf16

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -538,6 +538,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.canonicalizef16
     libc.src.math.ceilf16
     libc.src.math.copysignf16
+    libc.src.math.f16addf
     libc.src.math.f16div
     libc.src.math.f16divf
     libc.src.math.f16divl
@@ -547,6 +548,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.f16sqrt
     libc.src.math.f16sqrtf
     libc.src.math.f16sqrtl
+    libc.src.math.f16subf
     libc.src.math.fabsf16
     libc.src.math.fdimf16
     libc.src.math.floorf16

diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -124,10 +124,14 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | dsub             | N/A              | N/A             |                        | N/A                  |                        | 7.12.14.2              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
+| f16add           | |check|\*        |                 |                        | N/A                  |                        | 7.12.14.1              | F.10.11                    |
++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | f16div           | |check|\*        | |check|\*       | |check|\*              | N/A                  | |check|                | 7.12.14.4              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | f16fma           | |check|          | |check|         | |check|                | N/A                  | |check|                | 7.12.14.5              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
+| f16sub           | |check|\*        |                 |                        | N/A                  |                        | 7.12.14.2              | F.10.11                    |
++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fabs             | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.7.3               | F.10.4.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fadd             | N/A              |                 |                        | N/A                  |                        | 7.12.14.1              | F.10.11                    |

diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td
@@ -57,6 +57,10 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> {
       [], // Types
       [], // Enumerations
       [
+          GuardedFunctionSpec<"f16addf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+
+          GuardedFunctionSpec<"f16subf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+
           GuardedFunctionSpec<"f16div", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16divf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16divl", RetValSpec<Float16Type>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>], "LIBC_TYPES_HAS_FLOAT16">,

diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -49,6 +49,25 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  add_sub
+  HDRS
+    add_sub.h
+  DEPENDS
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.CPP.algorithm
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.type_traits
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.attributes
+    libc.src.__support.macros.optimization
+)
+
 add_header_library(
   div
   HDRS

diff --git a/libc/src/__support/FPUtil/generic/add_sub.h b/libc/src/__support/FPUtil/generic/add_sub.h
@@ -0,0 +1,206 @@
+//===-- Add and subtract IEEE 754 floating-point numbers --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_ADD_SUB_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_ADD_SUB_H
+
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/algorithm.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE::fputil::generic {
+
+template <bool IsSub, typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+                                 cpp::is_floating_point_v<InType> &&
+                                 sizeof(OutType) <= sizeof(InType),
+                             OutType>
+add_or_sub(InType x, InType y) {
+  using OutFPBits = FPBits<OutType>;
+  using OutStorageType = typename OutFPBits::StorageType;
+  using InFPBits = FPBits<InType>;
+  using InStorageType = typename InFPBits::StorageType;
+
+  constexpr int GUARD_BITS_LEN = 3;
+  constexpr int RESULT_FRACTION_LEN = InFPBits::FRACTION_LEN + GUARD_BITS_LEN;
+  constexpr int RESULT_MANTISSA_LEN = RESULT_FRACTION_LEN + 1;
+
+  using DyadicFloat =
+      DyadicFloat<cpp::bit_ceil(static_cast<size_t>(RESULT_MANTISSA_LEN))>;
+
+  InFPBits x_bits(x);
+  InFPBits y_bits(y);
+
+  bool is_effectively_add = (x_bits.sign() == y_bits.sign()) != IsSub;
+
+  if (LIBC_UNLIKELY(x_bits.is_inf_or_nan() || y_bits.is_inf_or_nan() ||
+                    x_bits.is_zero() || y_bits.is_zero())) {
+    if (x_bits.is_nan() || y_bits.is_nan()) {
+      if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan())
+        raise_except_if_required(FE_INVALID);
+
+      if (x_bits.is_quiet_nan()) {
+        InStorageType x_payload = static_cast<InStorageType>(getpayload(x));
+        if ((x_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(x_bits.sign(),
+                                      static_cast<OutStorageType>(x_payload))
+              .get_val();
+      }
+
+      if (y_bits.is_quiet_nan()) {
+        InStorageType y_payload = static_cast<InStorageType>(getpayload(y));
+        if ((y_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(y_bits.sign(),
+                                      static_cast<OutStorageType>(y_payload))
+              .get_val();
+      }
+
+      return OutFPBits::quiet_nan().get_val();
+    }
+
+    if (x_bits.is_inf()) {
+      if (y_bits.is_inf()) {
+        if (!is_effectively_add) {
+          raise_except_if_required(FE_INVALID);
+          return OutFPBits::quiet_nan().get_val();
+        }
+
+        return OutFPBits::inf(x_bits.sign()).get_val();
+      }
+
+      return OutFPBits::inf(x_bits.sign()).get_val();
+    }
+
+    if (y_bits.is_inf())
+      return OutFPBits::inf(y_bits.sign()).get_val();
+
+    if (x_bits.is_zero()) {
+      if (y_bits.is_zero()) {
+        switch (quick_get_round()) {
+        case FE_DOWNWARD:
+          return OutFPBits::zero(Sign::NEG).get_val();
+        default:
+          return OutFPBits::zero(Sign::POS).get_val();
+        }
+      }
+
+      // volatile prevents Clang from converting tmp to OutType and then
+      // immediately back to InType before negating it, resulting in double
+      // rounding.
+      volatile InType tmp = y;
+      if constexpr (IsSub)
+        tmp = -tmp;
+      return static_cast<OutType>(tmp);
+    }
+
+    if (y_bits.is_zero()) {
+      volatile InType tmp = y;
+      if constexpr (IsSub)
+        tmp = -tmp;
+      return static_cast<OutType>(tmp);
+    }
+  }
+
+  InType x_abs = x_bits.abs().get_val();
+  InType y_abs = y_bits.abs().get_val();
+
+  if (x_abs == y_abs && !is_effectively_add) {
+    switch (quick_get_round()) {
+    case FE_DOWNWARD:
+      return OutFPBits::zero(Sign::NEG).get_val();
+    default:
+      return OutFPBits::zero(Sign::POS).get_val();
+    }
+  }
+
+  Sign result_sign = Sign::POS;
+
+  if (x_abs > y_abs) {
+    result_sign = x_bits.sign();
+  } else if (x_abs < y_abs) {
+    if (is_effectively_add)
+      result_sign = y_bits.sign();
+    else if (y_bits.is_pos())
+      result_sign = Sign::NEG;
+  } else if (is_effectively_add) {
+    result_sign = x_bits.sign();
+  }
+
+  InFPBits max_bits(cpp::max(x_abs, y_abs));
+  InFPBits min_bits(cpp::min(x_abs, y_abs));
+
+  InStorageType result_mant;
+
+  if (max_bits.is_subnormal()) {
+    // min_bits must be subnormal too.
+
+    if (is_effectively_add)
+      result_mant = max_bits.get_mantissa() + min_bits.get_mantissa();
+    else
+      result_mant = max_bits.get_mantissa() - min_bits.get_mantissa();
+
+    result_mant <<= GUARD_BITS_LEN;
+  } else {
+    InStorageType max_mant = max_bits.get_explicit_mantissa() << GUARD_BITS_LEN;
+    InStorageType min_mant = min_bits.get_explicit_mantissa() << GUARD_BITS_LEN;
+    int alignment =
+        max_bits.get_biased_exponent() - min_bits.get_biased_exponent();
+
+    InStorageType aligned_min_mant =
+        min_mant >> cpp::min(alignment, RESULT_MANTISSA_LEN);
+    bool aligned_min_mant_sticky;
+
+    if (alignment <= 3)
+      aligned_min_mant_sticky = false;
+    else if (alignment <= InFPBits::FRACTION_LEN + 3)
+      aligned_min_mant_sticky =
+          (min_mant << (InFPBits::STORAGE_LEN - alignment)) != 0;
+    else
+      aligned_min_mant_sticky = true;
+
+    if (is_effectively_add)
+      result_mant = max_mant + (aligned_min_mant | aligned_min_mant_sticky);
+    else
+      result_mant = max_mant - (aligned_min_mant | aligned_min_mant_sticky);
+  }
+
+  int result_exp = max_bits.get_exponent() - RESULT_FRACTION_LEN;
+  DyadicFloat result(result_sign, result_exp, result_mant);
+  return result.template as<OutType, /*ShouldSignalExceptions=*/true>();
+}
+
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+                                 cpp::is_floating_point_v<InType> &&
+                                 sizeof(OutType) <= sizeof(InType),
+                             OutType>
+add(InType x, InType y) {
+  return add_or_sub</*IsSub=*/false, OutType>(x, y);
+}
+
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+                                 cpp::is_floating_point_v<InType> &&
+                                 sizeof(OutType) <= sizeof(InType),
+                             OutType>
+sub(InType x, InType y) {
+  return add_or_sub</*IsSub=*/true, OutType>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE::fputil::generic
+
+#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_ADD_SUB_H
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f)
 add_math_entrypoint_object(expm1)
 add_math_entrypoint_object(expm1f)
 
+add_math_entrypoint_object(f16addf)
+
 add_math_entrypoint_object(f16div)
 add_math_entrypoint_object(f16divf)
 add_math_entrypoint_object(f16divl)
@@ -114,6 +116,8 @@ add_math_entrypoint_object(f16sqrtf)
 add_math_entrypoint_object(f16sqrtl)
 add_math_entrypoint_object(f16sqrtf128)
 
+add_math_entrypoint_object(f16subf)
+
 add_math_entrypoint_object(fabs)
 add_math_entrypoint_object(fabsf)
 add_math_entrypoint_object(fabsl)

diff --git a/libc/src/math/f16addf.h b/libc/src/math/f16addf.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16addf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16ADDF_H
+#define LLVM_LIBC_SRC_MATH_F16ADDF_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16addf(float x, float y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16ADDF_H
diff --git a/libc/src/math/f16subf.h b/libc/src/math/f16subf.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16subf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16SUBF_H
+#define LLVM_LIBC_SRC_MATH_F16SUBF_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16subf(float x, float y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16SUBF_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -3795,6 +3795,32 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  f16addf
+  SRCS
+    f16addf.cpp
+  HDRS
+    ../f16addf.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.generic.add_sub
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  f16subf
+  SRCS
+    f16subf.cpp
+  HDRS
+    ../f16subf.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.generic.add_sub
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   f16div
   SRCS