llvm · lntue · Jun 25, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
@@ -107,7 +107,10 @@ template <size_t Bits> struct DyadicFloat {
       T d_hi =
           FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
               .get_val();
-      return T(2) * d_hi;
+      // volatile prevents constant propagation that would result in infinity
+      // always being returned no matter the current rounding mode.
+      volatile T two(2.0);
+      return two * d_hi;
     }
 
     bool denorm = false;
@@ -179,10 +182,18 @@ template <size_t Bits> struct DyadicFloat {
       output_bits_t clear_exp = static_cast<output_bits_t>(
           output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
       output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
+
       if (!(r_bits & FPBits<T>::EXP_MASK)) {
         // Output is denormal after rounding, clear the implicit bit for 80-bit
         // long double.
         r_bits -= IMPLICIT_MASK;
+
+        // TODO: IEEE Std 754-2019 lets implementers choose whether to check for
+        // "tininess" before or after rounding for base-2 formats, as long as
+        // the same choice is made for all operations. Our choice to check after
+        // rounding might not be the same as the hardware's.
+        if (round_and_sticky)
+          raise_except_if_required(FE_UNDERFLOW);
       }
 
       return FPBits<T>(r_bits).get_val();

diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -51,15 +51,13 @@ add_header_library(
   HDRS
     div.h
   DEPENDS
-    libc.hdr.errno_macros
     libc.hdr.fenv_macros
     libc.src.__support.CPP.bit
     libc.src.__support.CPP.type_traits
     libc.src.__support.FPUtil.basic_operations
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.dyadic_float
-    libc.src.__support.FPUtil.rounding_mode
     libc.src.__support.macros.attributes
     libc.src.__support.macros.optimization
 )
diff --git a/libc/src/__support/FPUtil/generic/div.h b/libc/src/__support/FPUtil/generic/div.h
@@ -9,15 +9,13 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
 
-#include "hdr/errno_macros.h"
 #include "hdr/fenv_macros.h"
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/FPUtil/BasicOperations.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/dyadic_float.h"
-#include "src/__support/FPUtil/rounding_mode.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/optimization.h"
 
@@ -35,14 +33,6 @@ div(InType x, InType y) {
   using InStorageType = typename InFPBits::StorageType;
   using DyadicFloat =
       DyadicFloat<cpp::bit_ceil(static_cast<size_t>(InFPBits::FRACTION_LEN))>;
-  using DyadicMantissaType = typename DyadicFloat::MantissaType;
-
-  // +1 for the implicit bit.
-  constexpr int DYADIC_EXTRA_MANTISSA_LEN =
-      DyadicMantissaType::BITS - (InFPBits::FRACTION_LEN + 1);
-  // +1 for the extra fractional bit in q.
-  constexpr int Q_EXTRA_FRACTION_LEN =
-      InFPBits::FRACTION_LEN + 1 - OutFPBits::FRACTION_LEN;
 
   InFPBits x_bits(x);
   InFPBits y_bits(y);
@@ -104,120 +94,33 @@ div(InType x, InType y) {
   DyadicFloat xd(x);
   DyadicFloat yd(y);
 
-  bool would_q_be_subnormal = xd.mantissa < yd.mantissa;
-  int q_exponent = xd.get_unbiased_exponent() - yd.get_unbiased_exponent() -
-                   would_q_be_subnormal;
-
-  if (q_exponent + OutFPBits::EXP_BIAS >= OutFPBits::MAX_BIASED_EXPONENT) {
-    set_errno_if_required(ERANGE);
-    raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+  // Number of iterations = full output precision + 1 rounding bit + 1 potential
+  // leading 0.
+  constexpr size_t NUM_ITERS = OutFPBits::FRACTION_LEN + 3;
+  int result_exp = xd.exponent - yd.exponent - (NUM_ITERS - 1);
 
-    switch (get_round()) {
-    case FE_TONEAREST:
-      return OutFPBits::inf(result_sign).get_val();
-    case FE_DOWNWARD:
-      if (result_sign.is_pos())
-        return OutFPBits::max_normal(result_sign).get_val();
-      return OutFPBits::inf(result_sign).get_val();
-    case FE_UPWARD:
-      if (result_sign.is_pos())
-        return OutFPBits::inf(result_sign).get_val();
-      return OutFPBits::max_normal(result_sign).get_val();
-    default:
-      return OutFPBits::max_normal(result_sign).get_val();
-    }
-  }
+  InStorageType q = 0;
+  InStorageType r = static_cast<InStorageType>(xd.mantissa >> 2);
+  InStorageType yd_mant_in = static_cast<InStorageType>(yd.mantissa >> 1);
 
-  if (q_exponent < -OutFPBits::EXP_BIAS - OutFPBits::FRACTION_LEN) {
-    set_errno_if_required(ERANGE);
-    raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
-
-    switch (quick_get_round()) {
-    case FE_DOWNWARD:
-      if (result_sign.is_pos())
-        return OutFPBits::zero(result_sign).get_val();
-      return OutFPBits::min_subnormal(result_sign).get_val();
-    case FE_UPWARD:
-      if (result_sign.is_pos())
-        return OutFPBits::min_subnormal(result_sign).get_val();
-      return OutFPBits::zero(result_sign).get_val();
-    default:
-      return OutFPBits::zero(result_sign).get_val();
-    }
-  }
-
-  InStorageType q = 1;
-  InStorageType xd_mant_in = static_cast<InStorageType>(
-      xd.mantissa >> (DYADIC_EXTRA_MANTISSA_LEN - would_q_be_subnormal));
-  InStorageType yd_mant_in =
-      static_cast<InStorageType>(yd.mantissa >> DYADIC_EXTRA_MANTISSA_LEN);
-  InStorageType r = xd_mant_in - yd_mant_in;
-
-  for (size_t i = 0; i < InFPBits::FRACTION_LEN + 1; i++) {
+  for (size_t i = 0; i < NUM_ITERS; ++i) {
     q <<= 1;
-    InStorageType t = r << 1;
-    if (t < yd_mant_in) {
-      r = t;
-    } else {
+    r <<= 1;
+    if (r >= yd_mant_in) {
       q += 1;
-      r = t - yd_mant_in;
+      r -= yd_mant_in;
     }
   }
 
-  bool round;
-  bool sticky;
-  OutStorageType result;
-
-  if (q_exponent > -OutFPBits::EXP_BIAS) {
-    // Result is normal.
-
-    InStorageType round_mask = InStorageType(1) << (Q_EXTRA_FRACTION_LEN - 1);
-    round = (q & round_mask) != 0;
-    InStorageType sticky_mask = round_mask - 1;
-    sticky = (q & sticky_mask) != 0;
-
-    result = OutFPBits::create_value(
-                 result_sign,
-                 static_cast<OutStorageType>(q_exponent + OutFPBits::EXP_BIAS),
-                 static_cast<OutStorageType>(q >> Q_EXTRA_FRACTION_LEN))
-                 .uintval();
-
-  } else {
-    // Result is subnormal.
+  DyadicFloat result(result_sign, result_exp, q);
+  result.mantissa += r != 0;
 
-    // +1 because the leading bit is now part of the fraction.
-    int extra_fraction_len =
-        Q_EXTRA_FRACTION_LEN + 1 - q_exponent - OutFPBits::EXP_BIAS;
+  OutType output = static_cast<OutType>(result);
 
-    InStorageType round_mask = InStorageType(1) << (extra_fraction_len - 1);
-    round = (q & round_mask) != 0;
-    InStorageType sticky_mask = round_mask - 1;
-    sticky = (q & sticky_mask) != 0;
-
-    result = OutFPBits::create_value(
-                 result_sign, 0,
-                 static_cast<OutStorageType>(q >> extra_fraction_len))
-                 .uintval();
-  }
-
-  if (round || sticky)
-    raise_except_if_required(FE_INEXACT);
-
-  bool lsb = (result & 1) != 0;
-
-  switch (quick_get_round()) {
-  case FE_TONEAREST:
-    if (round && (lsb || sticky))
-      ++result;
-    break;
-  case FE_UPWARD:
-    ++result;
-    break;
-  default:
-    break;
-  }
+  if (test_except(FE_OVERFLOW | FE_UNDERFLOW) != 0)
+    set_errno_if_required(ERANGE);
 
-  return cpp::bit_cast<OutType>(result);
+  return output;
 }
 
 } // namespace LIBC_NAMESPACE::fputil::generic