Skip to content

Commit

Permalink
[builtins] Support building the 128-bit float functions on ld80 platf…
Browse files Browse the repository at this point in the history
…orms (#68132)

GCC provides these functions (e.g. __addtf3, etc.) in libgcc on x86_64.
Since Clang supports float128, we can also enable the existing code by
using float128 for fp_t if either __FLOAT128__ or __SIZEOF_FLOAT128__ is
defined instead of only supporting these builtins for platforms with
128-bit IEEE long doubles.
This commit defines a new tf_float typedef that matches a float with
attribute((mode(TF)) on each given architecture.

There are more tests that could be enabled for x86, but to keep the diff
smaller, I restricted test changes to ones that started failing as part
of this refactoring.

This change has been tested on x86 (natively) and
aarch64,powerpc64,riscv64 and sparc64 via qemu-user.

This supersedes https://reviews.llvm.org/D98261 and should also cover
the changes from #68041.
  • Loading branch information
arichardson authored Oct 24, 2023
1 parent 16fe53c commit d2ce3e9
Show file tree
Hide file tree
Showing 33 changed files with 1,060 additions and 954 deletions.
2 changes: 0 additions & 2 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ set(BF16_SOURCES
truncsfbf2.c
)

# TODO: Several "tf" files (and divtc3.c, but not multc3.c) are in
# GENERIC_SOURCES instead of here.
set(GENERIC_TF_SOURCES
addtf3.c
comparetf2.c
Expand Down
21 changes: 12 additions & 9 deletions compiler-rt/lib/builtins/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,49 +137,54 @@ si_int __ucmpti2(tu_int a, tu_int b);
di_int __fixsfdi( float a);
di_int __fixdfdi( double a);
di_int __fixxfdi(long double a);
di_int __fixtfdi( tf_float a);

ti_int __fixsfti( float a);
ti_int __fixdfti( double a);
ti_int __fixxfti(long double a);
uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation
ti_int __fixtfti( tf_float a);

su_int __fixunssfsi( float a);
su_int __fixunsdfsi( double a);
su_int __fixunsxfsi(long double a);
su_int __fixunstfsi( tf_float a);

du_int __fixunssfdi( float a);
du_int __fixunsdfdi( double a);
du_int __fixunsxfdi(long double a);
du_int __fixunstfdi( tf_float a);

tu_int __fixunssfti( float a);
tu_int __fixunsdfti( double a);
tu_int __fixunsxfti(long double a);
uint64_t __fixunstfdi(long double input); // ppc only
tu_int __fixunstfti( tf_float a);

float __floatdisf(di_int a);
double __floatdidf(di_int a);
long double __floatdixf(di_int a);
long double __floatditf(int64_t a); // ppc only
tf_float __floatditf(int64_t a);

float __floattisf(ti_int a);
double __floattidf(ti_int a);
long double __floattixf(ti_int a);
tf_float __floattitf(ti_int a);

float __floatundisf(du_int a);
double __floatundidf(du_int a);
long double __floatundixf(du_int a);
long double __floatunditf(uint64_t a); // ppc only
tf_float __floatunditf(du_int a);

float __floatuntisf(tu_int a);
double __floatuntidf(tu_int a);
long double __floatuntixf(tu_int a);
tf_float __floatuntixf(tu_int a);

// Floating point raised to integer power

float __powisf2( float a, int b); // a ^ b
double __powidf2( double a, int b); // a ^ b
long double __powixf2(long double a, int b); // a ^ b
long double __powitf2(long double a, int b); // ppc only, a ^ b
tf_float __powitf2( tf_float a, int b); // a ^ b

// Complex arithmetic

Expand All @@ -189,17 +194,15 @@ long double __powitf2(long double a, int b); // ppc only, a ^ b
double _Complex __muldc3(double a, double b, double c, double d);
long double _Complex __mulxc3(long double a, long double b,
long double c, long double d);
long double _Complex __multc3(long double a, long double b,
long double c, long double d); // ppc only
tf_float _Complex __multc3(tf_float a, tf_float b, tf_float c, tf_float d);

// (a + ib) / (c + id)

float _Complex __divsc3( float a, float b, float c, float d);
double _Complex __divdc3(double a, double b, double c, double d);
long double _Complex __divxc3(long double a, long double b,
long double c, long double d);
long double _Complex __divtc3(long double a, long double b,
long double c, long double d); // ppc only
tf_float _Complex __divtc3(tf_float a, tf_float b, tf_float c, tf_float d);


// Runtime support
Expand Down
51 changes: 26 additions & 25 deletions compiler-rt/lib/builtins/divtc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,45 @@

#define QUAD_PRECISION
#include "fp_lib.h"
#include "int_lib.h"
#include "int_math.h"

#if defined(CRT_HAS_TF_MODE)

// Returns: the quotient of (a + ib) / (c + id)

COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b,
long double __c, long double __d) {
COMPILER_RT_ABI Qcomplex __divtc3(fp_t __a, fp_t __b, fp_t __c, fp_t __d) {
int __ilogbw = 0;
long double __logbw =
__compiler_rt_logbl(__compiler_rt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
fp_t __logbw = __compiler_rt_logbtf(
__compiler_rt_fmaxtf(crt_fabstf(__c), crt_fabstf(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = __compiler_rt_scalbnl(__c, -__ilogbw);
__d = __compiler_rt_scalbnl(__d, -__ilogbw);
__c = __compiler_rt_scalbntf(__c, -__ilogbw);
__d = __compiler_rt_scalbntf(__d, -__ilogbw);
}
long double __denom = __c * __c + __d * __d;
Lcomplex z;
COMPLEX_REAL(z) =
__compiler_rt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
__compiler_rt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
fp_t __denom = __c * __c + __d * __d;
Qcomplex z;
COMPLEXTF_REAL(z) =
__compiler_rt_scalbntf((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEXTF_IMAGINARY(z) =
__compiler_rt_scalbntf((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEXTF_REAL(z)) && crt_isnan(COMPLEXTF_IMAGINARY(z))) {
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
COMPLEXTF_REAL(z) = crt_copysigntf(CRT_INFINITY, __c) * __a;
COMPLEXTF_IMAGINARY(z) = crt_copysigntf(CRT_INFINITY, __c) * __b;
} else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) &&
crt_isfinite(__d)) {
__a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
__a = crt_copysigntf(crt_isinf(__a) ? (fp_t)1.0 : (fp_t)0.0, __a);
__b = crt_copysigntf(crt_isinf(__b) ? (fp_t)1.0 : (fp_t)0.0, __b);
COMPLEXTF_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEXTF_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
} else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) &&
crt_isfinite(__b)) {
__c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
__c = crt_copysigntf(crt_isinf(__c) ? (fp_t)1.0 : (fp_t)0.0, __c);
__d = crt_copysigntf(crt_isinf(__d) ? (fp_t)1.0 : (fp_t)0.0, __d);
COMPLEXTF_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEXTF_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
}
return z;
}

#endif
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/extenddftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI fp_t __extenddftf2(double a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }

#endif
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/extendhftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI long double __extendhftf2(_Float16 a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI dst_t __extendhftf2(src_t a) { return __extendXfYf2__(a); }

#endif
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/extendsftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI fp_t __extendsftf2(float a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }

#endif
9 changes: 5 additions & 4 deletions compiler-rt/lib/builtins/extendxftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
// Assumption: long double is a IEEE 80 bit floating point type padded to 128
// bits.

// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
#define QUAD_PRECISION
#include "fp_lib.h"

#if defined(CRT_HAS_TF_MODE) && __LDBL_MANT_DIG__ == 64 && defined(__x86_64__)
#define SRC_80
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
COMPILER_RT_ABI tf_float __extendxftf2(long double a) {
return __extendXfYf2__(a);
}

Expand Down
8 changes: 1 addition & 7 deletions compiler-rt/lib/builtins/fp_extend.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,7 @@ static const int dstSigFracBits = 52;
static const int dstExpBits = 11;

#elif defined DST_QUAD
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
#if __LDBL_MANT_DIG__ == 113
typedef long double dst_t;
#elif defined(__x86_64__) && \
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
typedef __float128 dst_t;
#endif
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
Expand Down
50 changes: 26 additions & 24 deletions compiler-rt/lib/builtins/fp_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,11 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);

#elif defined QUAD_PRECISION
#if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__)
// TODO: Availability of the *tf functions should not depend on long double
// being IEEE 128, but instead on being able to use a 128-bit floating-point
// type, which includes __float128.
// Right now this (incorrectly) stops the builtins from being used for x86.
#define CRT_LDBL_128BIT
#define CRT_HAS_TF_MODE
#define TF_C(c) c##L
#if defined(CRT_HAS_TF_MODE)
typedef uint64_t half_rep_t;
typedef __uint128_t rep_t;
typedef __int128_t srep_t;
typedef long double fp_t;
typedef tf_float fp_t;
#define HALF_REP_C UINT64_C
#define REP_C (__uint128_t)
// Note: Since there is no explicit way to tell compiler the constant is a
Expand Down Expand Up @@ -207,13 +200,13 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
#undef Word_HiMask
#undef Word_LoMask
#undef Word_FullMask
#endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__
#endif // defined(CRT_HAS_TF_MODE)
#else
#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
#endif

#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \
defined(CRT_LDBL_128BIT)
(defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE))
#define typeWidth (sizeof(rep_t) * CHAR_BIT)
#define exponentBits (typeWidth - significandBits - 1)
#define maxExponent ((1 << exponentBits) - 1)
Expand Down Expand Up @@ -393,31 +386,40 @@ static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
#endif
}

#elif defined(QUAD_PRECISION)

#if defined(CRT_LDBL_128BIT)
static __inline fp_t __compiler_rt_logbl(fp_t x) {
#elif defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE)
// The generic implementation only works for ieee754 floating point. For other
// floating point types, continue to rely on the libm implementation for now.
#if defined(CRT_HAS_IEEE_TF)
static __inline tf_float __compiler_rt_logbtf(tf_float x) {
return __compiler_rt_logbX(x);
}
static __inline fp_t __compiler_rt_scalbnl(fp_t x, int y) {
static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
return __compiler_rt_scalbnX(x, y);
}
static __inline fp_t __compiler_rt_fmaxl(fp_t x, fp_t y) {
static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
return __compiler_rt_fmaxX(x, y);
}
#else
// The generic implementation only works for ieee754 floating point. For other
// floating point types, continue to rely on the libm implementation for now.
static __inline long double __compiler_rt_logbl(long double x) {
#define __compiler_rt_logbl __compiler_rt_logbtf
#define __compiler_rt_scalbnl __compiler_rt_scalbntf
#define __compiler_rt_fmaxl __compiler_rt_fmaxtf
#define crt_fabstf crt_fabsf128
#define crt_copysigntf crt_copysignf128
#elif defined(CRT_LDBL_128BIT)
static __inline tf_float __compiler_rt_logbtf(tf_float x) {
return crt_logbl(x);
}
static __inline long double __compiler_rt_scalbnl(long double x, int y) {
static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
return crt_scalbnl(x, y);
}
static __inline long double __compiler_rt_fmaxl(long double x, long double y) {
static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
return crt_fmaxl(x, y);
}
#endif // CRT_LDBL_128BIT
#define __compiler_rt_logbl crt_logbl
#define __compiler_rt_scalbnl crt_scalbnl
#define __compiler_rt_fmaxl crt_fmaxl
#else
#error Unsupported TF mode type
#endif

#endif // *_PRECISION

Expand Down
8 changes: 1 addition & 7 deletions compiler-rt/lib/builtins/fp_trunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,7 @@ static const int srcSigFracBits = 52;
static const int srcExpBits = 11;

#elif defined SRC_QUAD
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
#if __LDBL_MANT_DIG__ == 113
typedef long double src_t;
#elif defined(__x86_64__) && \
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
typedef __float128 src_t;
#endif
typedef tf_float src_t;
typedef __uint128_t src_rep_t;
#define SRC_REP_C (__uint128_t)
static const int srcBits = sizeof(src_t) * CHAR_BIT;
Expand Down
10 changes: 10 additions & 0 deletions compiler-rt/lib/builtins/int_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@
#define crt_copysign(x, y) __builtin_copysign((x), (y))
#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
#if __has_builtin(__builtin_copysignf128)
#define crt_copysignf128(x, y) __builtin_copysignf128((x), (y))
#elif __has_builtin(__builtin_copysignq) || (defined(__GNUC__) && __GNUC__ >= 7)
#define crt_copysignf128(x, y) __builtin_copysignq((x), (y))
#endif
#endif

#if defined(_MSC_VER) && !defined(__clang__)
Expand All @@ -75,6 +80,11 @@
#define crt_fabs(x) __builtin_fabs((x))
#define crt_fabsf(x) __builtin_fabsf((x))
#define crt_fabsl(x) __builtin_fabsl((x))
#if __has_builtin(__builtin_fabsf128)
#define crt_fabsf128(x) __builtin_fabsf128((x))
#elif __has_builtin(__builtin_fabsq) || (defined(__GNUC__) && __GNUC__ >= 7)
#define crt_fabsf128(x) __builtin_fabsq((x))
#endif
#endif

#if defined(_MSC_VER) && !defined(__clang__)
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/int_to_fp.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ enum {
};

#elif defined DST_QUAD
typedef long double dst_t;
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)

Expand Down
Loading

0 comments on commit d2ce3e9

Please sign in to comment.