Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc][math][c23] Add f16divf C23 math function #96131

Merged
merged 8 commits into from
Jun 25, 2024
13 changes: 12 additions & 1 deletion libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ template <size_t Bits> struct DyadicFloat {
T d_hi =
FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
.get_val();
return T(2) * d_hi;
// volatile prevents constant propagation that would result in infinity
// always being returned no matter the current rounding mode.
volatile T two(2.0);
return two * d_hi;
}

bool denorm = false;
Expand Down Expand Up @@ -179,10 +182,18 @@ template <size_t Bits> struct DyadicFloat {
output_bits_t clear_exp = static_cast<output_bits_t>(
output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;

if (!(r_bits & FPBits<T>::EXP_MASK)) {
// Output is denormal after rounding, clear the implicit bit for 80-bit
// long double.
r_bits -= IMPLICIT_MASK;

// TODO: IEEE Std 754-2019 lets implementers choose whether to check for
// "tininess" before or after rounding for base-2 formats, as long as
// the same choice is made for all operations. Our choice to check after
// rounding might not be the same as the hardware's.
if (round_and_sticky)
raise_except_if_required(FE_UNDERFLOW);
overmighty marked this conversation as resolved.
Show resolved Hide resolved
}

return FPBits<T>(r_bits).get_val();
Expand Down
2 changes: 0 additions & 2 deletions libc/src/__support/FPUtil/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,13 @@ add_header_library(
HDRS
div.h
DEPENDS
libc.hdr.errno_macros
libc.hdr.fenv_macros
libc.src.__support.CPP.bit
libc.src.__support.CPP.type_traits
libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.attributes
libc.src.__support.macros.optimization
)
131 changes: 17 additions & 114 deletions libc/src/__support/FPUtil/generic/div.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,13 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H

#include "hdr/errno_macros.h"
overmighty marked this conversation as resolved.
Show resolved Hide resolved
#include "hdr/fenv_macros.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/optimization.h"

Expand All @@ -35,14 +33,6 @@ div(InType x, InType y) {
using InStorageType = typename InFPBits::StorageType;
using DyadicFloat =
DyadicFloat<cpp::bit_ceil(static_cast<size_t>(InFPBits::FRACTION_LEN))>;
using DyadicMantissaType = typename DyadicFloat::MantissaType;

// +1 for the implicit bit.
constexpr int DYADIC_EXTRA_MANTISSA_LEN =
DyadicMantissaType::BITS - (InFPBits::FRACTION_LEN + 1);
// +1 for the extra fractional bit in q.
constexpr int Q_EXTRA_FRACTION_LEN =
InFPBits::FRACTION_LEN + 1 - OutFPBits::FRACTION_LEN;

InFPBits x_bits(x);
InFPBits y_bits(y);
Expand Down Expand Up @@ -104,120 +94,33 @@ div(InType x, InType y) {
DyadicFloat xd(x);
DyadicFloat yd(y);

bool would_q_be_subnormal = xd.mantissa < yd.mantissa;
int q_exponent = xd.get_unbiased_exponent() - yd.get_unbiased_exponent() -
would_q_be_subnormal;

if (q_exponent + OutFPBits::EXP_BIAS >= OutFPBits::MAX_BIASED_EXPONENT) {
set_errno_if_required(ERANGE);
raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
// Number of iterations = full output precision + 1 rounding bit + 1 potential
// leading 0.
constexpr size_t NUM_ITERS = OutFPBits::FRACTION_LEN + 3;
int result_exp = xd.exponent - yd.exponent - (NUM_ITERS - 1);

switch (get_round()) {
case FE_TONEAREST:
return OutFPBits::inf(result_sign).get_val();
case FE_DOWNWARD:
if (result_sign.is_pos())
return OutFPBits::max_normal(result_sign).get_val();
return OutFPBits::inf(result_sign).get_val();
case FE_UPWARD:
if (result_sign.is_pos())
return OutFPBits::inf(result_sign).get_val();
return OutFPBits::max_normal(result_sign).get_val();
default:
return OutFPBits::max_normal(result_sign).get_val();
}
}
InStorageType q = 0;
InStorageType r = static_cast<InStorageType>(xd.mantissa >> 2);
InStorageType yd_mant_in = static_cast<InStorageType>(yd.mantissa >> 1);

if (q_exponent < -OutFPBits::EXP_BIAS - OutFPBits::FRACTION_LEN) {
set_errno_if_required(ERANGE);
raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);

switch (quick_get_round()) {
case FE_DOWNWARD:
if (result_sign.is_pos())
return OutFPBits::zero(result_sign).get_val();
return OutFPBits::min_subnormal(result_sign).get_val();
case FE_UPWARD:
if (result_sign.is_pos())
return OutFPBits::min_subnormal(result_sign).get_val();
return OutFPBits::zero(result_sign).get_val();
default:
return OutFPBits::zero(result_sign).get_val();
}
}

InStorageType q = 1;
InStorageType xd_mant_in = static_cast<InStorageType>(
xd.mantissa >> (DYADIC_EXTRA_MANTISSA_LEN - would_q_be_subnormal));
InStorageType yd_mant_in =
static_cast<InStorageType>(yd.mantissa >> DYADIC_EXTRA_MANTISSA_LEN);
InStorageType r = xd_mant_in - yd_mant_in;

for (size_t i = 0; i < InFPBits::FRACTION_LEN + 1; i++) {
for (size_t i = 0; i < NUM_ITERS; ++i) {
q <<= 1;
InStorageType t = r << 1;
if (t < yd_mant_in) {
r = t;
} else {
r <<= 1;
if (r >= yd_mant_in) {
q += 1;
r = t - yd_mant_in;
r -= yd_mant_in;
}
}

bool round;
bool sticky;
OutStorageType result;

if (q_exponent > -OutFPBits::EXP_BIAS) {
// Result is normal.

InStorageType round_mask = InStorageType(1) << (Q_EXTRA_FRACTION_LEN - 1);
round = (q & round_mask) != 0;
InStorageType sticky_mask = round_mask - 1;
sticky = (q & sticky_mask) != 0;

result = OutFPBits::create_value(
result_sign,
static_cast<OutStorageType>(q_exponent + OutFPBits::EXP_BIAS),
static_cast<OutStorageType>(q >> Q_EXTRA_FRACTION_LEN))
.uintval();

} else {
// Result is subnormal.
DyadicFloat result(result_sign, result_exp, q);
result.mantissa += r != 0;

// +1 because the leading bit is now part of the fraction.
int extra_fraction_len =
Q_EXTRA_FRACTION_LEN + 1 - q_exponent - OutFPBits::EXP_BIAS;
OutType output = static_cast<OutType>(result);

InStorageType round_mask = InStorageType(1) << (extra_fraction_len - 1);
round = (q & round_mask) != 0;
InStorageType sticky_mask = round_mask - 1;
sticky = (q & sticky_mask) != 0;

result = OutFPBits::create_value(
result_sign, 0,
static_cast<OutStorageType>(q >> extra_fraction_len))
.uintval();
}

if (round || sticky)
raise_except_if_required(FE_INEXACT);

bool lsb = (result & 1) != 0;

switch (quick_get_round()) {
case FE_TONEAREST:
if (round && (lsb || sticky))
++result;
break;
case FE_UPWARD:
++result;
break;
default:
break;
}
if (test_except(FE_OVERFLOW | FE_UNDERFLOW) != 0)
overmighty marked this conversation as resolved.
Show resolved Hide resolved
set_errno_if_required(ERANGE);

return cpp::bit_cast<OutType>(result);
return output;
}

} // namespace LIBC_NAMESPACE::fputil::generic
Expand Down
Loading