[builtins] Support building the 128-bit float functions on ld80 platf…

…orms (#68132) GCC provides these functions (e.g. __addtf3, etc.) in libgcc on x86_64. Since Clang supports float128, we can also enable the existing code by using float128 for fp_t if either __FLOAT128__ or __SIZEOF_FLOAT128__ is defined instead of only supporting these builtins for platforms with 128-bit IEEE long doubles. This commit defines a new tf_float typedef that matches a float with attribute((mode(TF)) on each given architecture. There are more tests that could be enabled for x86, but to keep the diff smaller, I restricted test changes to ones that started failing as part of this refactoring. This change has been tested on x86 (natively) and aarch64,powerpc64,riscv64 and sparc64 via qemu-user. This supersedes https://reviews.llvm.org/D98261 and should also cover the changes from #68041.
llvm · Oct 24, 2023 · d2ce3e9 · d2ce3e9
1 parent 16fe53c
commit d2ce3e9
Show file tree

Hide file tree

Showing 33 changed files with 1,060 additions and 954 deletions.
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -187,8 +187,6 @@ set(BF16_SOURCES
     truncsfbf2.c
 )
 
-# TODO: Several "tf" files (and divtc3.c, but not multc3.c) are in
-# GENERIC_SOURCES instead of here.
 set(GENERIC_TF_SOURCES
   addtf3.c
   comparetf2.c

diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
@@ -137,49 +137,54 @@ si_int __ucmpti2(tu_int a, tu_int b);
 di_int __fixsfdi(      float a);
 di_int __fixdfdi(     double a);
 di_int __fixxfdi(long double a);
+di_int __fixtfdi(   tf_float a);
 
 ti_int __fixsfti(      float a);
 ti_int __fixdfti(     double a);
 ti_int __fixxfti(long double a);
-uint64_t __fixtfdi(long double input);  // ppc only, doesn't match documentation
+ti_int __fixtfti(   tf_float a);
 
 su_int __fixunssfsi(      float a);
 su_int __fixunsdfsi(     double a);
 su_int __fixunsxfsi(long double a);
+su_int __fixunstfsi(   tf_float a);
 
 du_int __fixunssfdi(      float a);
 du_int __fixunsdfdi(     double a);
 du_int __fixunsxfdi(long double a);
+du_int __fixunstfdi(   tf_float a);
 
 tu_int __fixunssfti(      float a);
 tu_int __fixunsdfti(     double a);
 tu_int __fixunsxfti(long double a);
-uint64_t __fixunstfdi(long double input);  // ppc only
+tu_int __fixunstfti(   tf_float a);
 
 float       __floatdisf(di_int a);
 double      __floatdidf(di_int a);
 long double __floatdixf(di_int a);
-long double __floatditf(int64_t a);        // ppc only
+tf_float    __floatditf(int64_t a);
 
 float       __floattisf(ti_int a);
 double      __floattidf(ti_int a);
 long double __floattixf(ti_int a);
+tf_float    __floattitf(ti_int a);
 
 float       __floatundisf(du_int a);
 double      __floatundidf(du_int a);
 long double __floatundixf(du_int a);
-long double __floatunditf(uint64_t a);     // ppc only
+tf_float    __floatunditf(du_int a);
 
 float       __floatuntisf(tu_int a);
 double      __floatuntidf(tu_int a);
 long double __floatuntixf(tu_int a);
+tf_float    __floatuntixf(tu_int a);
 
 //  Floating point raised to integer power
 
 float       __powisf2(      float a, int b);  // a ^ b
 double      __powidf2(     double a, int b);  // a ^ b
 long double __powixf2(long double a, int b);  // a ^ b
-long double __powitf2(long double a, int b);  // ppc only, a ^ b
+tf_float    __powitf2(   tf_float a, int b);  // a ^ b
 
 //  Complex arithmetic
 
@@ -189,17 +194,15 @@ long double __powitf2(long double a, int b);  // ppc only, a ^ b
      double _Complex __muldc3(double a, double b, double c, double d);
 long double _Complex __mulxc3(long double a, long double b,
                               long double c, long double d);
-long double _Complex __multc3(long double a, long double b,
-                              long double c, long double d); // ppc only
+   tf_float _Complex __multc3(tf_float a, tf_float b, tf_float c, tf_float d);
 
 //  (a + ib) / (c + id)
 
       float _Complex __divsc3( float a,  float b,  float c,  float d);
      double _Complex __divdc3(double a, double b, double c, double d);
 long double _Complex __divxc3(long double a, long double b,
                               long double c, long double d);
-long double _Complex __divtc3(long double a, long double b,
-                              long double c, long double d);  // ppc only
+   tf_float _Complex __divtc3(tf_float a, tf_float b, tf_float c, tf_float d);
 
 
 //         Runtime support

diff --git a/compiler-rt/lib/builtins/divtc3.c b/compiler-rt/lib/builtins/divtc3.c
@@ -12,44 +12,45 @@
 
 #define QUAD_PRECISION
 #include "fp_lib.h"
-#include "int_lib.h"
-#include "int_math.h"
+
+#if defined(CRT_HAS_TF_MODE)
 
 // Returns: the quotient of (a + ib) / (c + id)
 
-COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b,
-                                  long double __c, long double __d) {
+COMPILER_RT_ABI Qcomplex __divtc3(fp_t __a, fp_t __b, fp_t __c, fp_t __d) {
   int __ilogbw = 0;
-  long double __logbw =
-      __compiler_rt_logbl(__compiler_rt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+  fp_t __logbw = __compiler_rt_logbtf(
+      __compiler_rt_fmaxtf(crt_fabstf(__c), crt_fabstf(__d)));
   if (crt_isfinite(__logbw)) {
     __ilogbw = (int)__logbw;
-    __c = __compiler_rt_scalbnl(__c, -__ilogbw);
-    __d = __compiler_rt_scalbnl(__d, -__ilogbw);
+    __c = __compiler_rt_scalbntf(__c, -__ilogbw);
+    __d = __compiler_rt_scalbntf(__d, -__ilogbw);
   }
-  long double __denom = __c * __c + __d * __d;
-  Lcomplex z;
-  COMPLEX_REAL(z) =
-      __compiler_rt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-  COMPLEX_IMAGINARY(z) =
-      __compiler_rt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-  if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
+  fp_t __denom = __c * __c + __d * __d;
+  Qcomplex z;
+  COMPLEXTF_REAL(z) =
+      __compiler_rt_scalbntf((__a * __c + __b * __d) / __denom, -__ilogbw);
+  COMPLEXTF_IMAGINARY(z) =
+      __compiler_rt_scalbntf((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (crt_isnan(COMPLEXTF_REAL(z)) && crt_isnan(COMPLEXTF_IMAGINARY(z))) {
     if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
-      COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
-      COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
+      COMPLEXTF_REAL(z) = crt_copysigntf(CRT_INFINITY, __c) * __a;
+      COMPLEXTF_IMAGINARY(z) = crt_copysigntf(CRT_INFINITY, __c) * __b;
     } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) &&
                crt_isfinite(__d)) {
-      __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
-      __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
-      COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
-      COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+      __a = crt_copysigntf(crt_isinf(__a) ? (fp_t)1.0 : (fp_t)0.0, __a);
+      __b = crt_copysigntf(crt_isinf(__b) ? (fp_t)1.0 : (fp_t)0.0, __b);
+      COMPLEXTF_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+      COMPLEXTF_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
     } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) &&
                crt_isfinite(__b)) {
-      __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
-      __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
-      COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
-      COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
+      __c = crt_copysigntf(crt_isinf(__c) ? (fp_t)1.0 : (fp_t)0.0, __c);
+      __d = crt_copysigntf(crt_isinf(__d) ? (fp_t)1.0 : (fp_t)0.0, __d);
+      COMPLEXTF_REAL(z) = 0.0 * (__a * __c + __b * __d);
+      COMPLEXTF_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
     }
   }
   return z;
 }
+
+#endif
diff --git a/compiler-rt/lib/builtins/extenddftf2.c b/compiler-rt/lib/builtins/extenddftf2.c
@@ -14,8 +14,6 @@
 #define DST_QUAD
 #include "fp_extend_impl.inc"
 
-COMPILER_RT_ABI fp_t __extenddftf2(double a) {
-  return __extendXfYf2__(a);
-}
+COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }
 
 #endif
diff --git a/compiler-rt/lib/builtins/extendhftf2.c b/compiler-rt/lib/builtins/extendhftf2.c
@@ -15,8 +15,6 @@
 #define DST_QUAD
 #include "fp_extend_impl.inc"
 
-COMPILER_RT_ABI long double __extendhftf2(_Float16 a) {
-  return __extendXfYf2__(a);
-}
+COMPILER_RT_ABI dst_t __extendhftf2(src_t a) { return __extendXfYf2__(a); }
 
 #endif
diff --git a/compiler-rt/lib/builtins/extendsftf2.c b/compiler-rt/lib/builtins/extendsftf2.c
@@ -14,8 +14,6 @@
 #define DST_QUAD
 #include "fp_extend_impl.inc"
 
-COMPILER_RT_ABI fp_t __extendsftf2(float a) {
-  return __extendXfYf2__(a);
-}
+COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }
 
 #endif
diff --git a/compiler-rt/lib/builtins/extendxftf2.c b/compiler-rt/lib/builtins/extendxftf2.c
@@ -9,14 +9,15 @@
 // Assumption: long double is a IEEE 80 bit floating point type padded to 128
 // bits.
 
-// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
-#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
-    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_TF_MODE) && __LDBL_MANT_DIG__ == 64 && defined(__x86_64__)
 #define SRC_80
 #define DST_QUAD
 #include "fp_extend_impl.inc"
 
-COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
+COMPILER_RT_ABI tf_float __extendxftf2(long double a) {
   return __extendXfYf2__(a);
 }
 

diff --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h
@@ -102,13 +102,7 @@ static const int dstSigFracBits = 52;
 static const int dstExpBits = 11;
 
 #elif defined DST_QUAD
-// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
-#if __LDBL_MANT_DIG__ == 113
-typedef long double dst_t;
-#elif defined(__x86_64__) &&                                                   \
-    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
-typedef __float128 dst_t;
-#endif
+typedef tf_float dst_t;
 typedef __uint128_t dst_rep_t;
 #define DST_REP_C (__uint128_t)
 static const int dstBits = sizeof(dst_t) * CHAR_BIT;

diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h
@@ -105,18 +105,11 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
 
 #elif defined QUAD_PRECISION
-#if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__)
-// TODO: Availability of the *tf functions should not depend on long double
-// being IEEE 128, but instead on being able to use a 128-bit floating-point
-// type, which includes __float128.
-// Right now this (incorrectly) stops the builtins from being used for x86.
-#define CRT_LDBL_128BIT
-#define CRT_HAS_TF_MODE
-#define TF_C(c) c##L
+#if defined(CRT_HAS_TF_MODE)
 typedef uint64_t half_rep_t;
 typedef __uint128_t rep_t;
 typedef __int128_t srep_t;
-typedef long double fp_t;
+typedef tf_float fp_t;
 #define HALF_REP_C UINT64_C
 #define REP_C (__uint128_t)
 // Note: Since there is no explicit way to tell compiler the constant is a
@@ -207,13 +200,13 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 #undef Word_HiMask
 #undef Word_LoMask
 #undef Word_FullMask
-#endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__
+#endif // defined(CRT_HAS_TF_MODE)
 #else
 #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
 #endif
 
 #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) ||                  \
-    defined(CRT_LDBL_128BIT)
+    (defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE))
 #define typeWidth (sizeof(rep_t) * CHAR_BIT)
 #define exponentBits (typeWidth - significandBits - 1)
 #define maxExponent ((1 << exponentBits) - 1)
@@ -393,31 +386,40 @@ static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
 #endif
 }
 
-#elif defined(QUAD_PRECISION)
-
-#if defined(CRT_LDBL_128BIT)
-static __inline fp_t __compiler_rt_logbl(fp_t x) {
+#elif defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE)
+// The generic implementation only works for ieee754 floating point. For other
+// floating point types, continue to rely on the libm implementation for now.
+#if defined(CRT_HAS_IEEE_TF)
+static __inline tf_float __compiler_rt_logbtf(tf_float x) {
   return __compiler_rt_logbX(x);
 }
-static __inline fp_t __compiler_rt_scalbnl(fp_t x, int y) {
+static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
   return __compiler_rt_scalbnX(x, y);
 }
-static __inline fp_t __compiler_rt_fmaxl(fp_t x, fp_t y) {
+static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
   return __compiler_rt_fmaxX(x, y);
 }
-#else
-// The generic implementation only works for ieee754 floating point. For other
-// floating point types, continue to rely on the libm implementation for now.
-static __inline long double __compiler_rt_logbl(long double x) {
+#define __compiler_rt_logbl __compiler_rt_logbtf
+#define __compiler_rt_scalbnl __compiler_rt_scalbntf
+#define __compiler_rt_fmaxl __compiler_rt_fmaxtf
+#define crt_fabstf crt_fabsf128
+#define crt_copysigntf crt_copysignf128
+#elif defined(CRT_LDBL_128BIT)
+static __inline tf_float __compiler_rt_logbtf(tf_float x) {
   return crt_logbl(x);
 }
-static __inline long double __compiler_rt_scalbnl(long double x, int y) {
+static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
   return crt_scalbnl(x, y);
 }
-static __inline long double __compiler_rt_fmaxl(long double x, long double y) {
+static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
   return crt_fmaxl(x, y);
 }
-#endif // CRT_LDBL_128BIT
+#define __compiler_rt_logbl crt_logbl
+#define __compiler_rt_scalbnl crt_scalbnl
+#define __compiler_rt_fmaxl crt_fmaxl
+#else
+#error Unsupported TF mode type
+#endif
 
 #endif // *_PRECISION
 

diff --git a/compiler-rt/lib/builtins/fp_trunc.h b/compiler-rt/lib/builtins/fp_trunc.h
@@ -36,13 +36,7 @@ static const int srcSigFracBits = 52;
 static const int srcExpBits = 11;
 
 #elif defined SRC_QUAD
-// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
-#if __LDBL_MANT_DIG__ == 113
-typedef long double src_t;
-#elif defined(__x86_64__) &&                                                   \
-    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
-typedef __float128 src_t;
-#endif
+typedef tf_float src_t;
 typedef __uint128_t src_rep_t;
 #define SRC_REP_C (__uint128_t)
 static const int srcBits = sizeof(src_t) * CHAR_BIT;

diff --git a/compiler-rt/lib/builtins/int_math.h b/compiler-rt/lib/builtins/int_math.h
@@ -65,6 +65,11 @@
 #define crt_copysign(x, y) __builtin_copysign((x), (y))
 #define crt_copysignf(x, y) __builtin_copysignf((x), (y))
 #define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+#if __has_builtin(__builtin_copysignf128)
+#define crt_copysignf128(x, y) __builtin_copysignf128((x), (y))
+#elif __has_builtin(__builtin_copysignq) || (defined(__GNUC__) && __GNUC__ >= 7)
+#define crt_copysignf128(x, y) __builtin_copysignq((x), (y))
+#endif
 #endif
 
 #if defined(_MSC_VER) && !defined(__clang__)
@@ -75,6 +80,11 @@
 #define crt_fabs(x) __builtin_fabs((x))
 #define crt_fabsf(x) __builtin_fabsf((x))
 #define crt_fabsl(x) __builtin_fabsl((x))
+#if __has_builtin(__builtin_fabsf128)
+#define crt_fabsf128(x) __builtin_fabsf128((x))
+#elif __has_builtin(__builtin_fabsq) || (defined(__GNUC__) && __GNUC__ >= 7)
+#define crt_fabsf128(x) __builtin_fabsq((x))
+#endif
 #endif
 
 #if defined(_MSC_VER) && !defined(__clang__)

diff --git a/compiler-rt/lib/builtins/int_to_fp.h b/compiler-rt/lib/builtins/int_to_fp.h
@@ -59,7 +59,7 @@ enum {
 };
 
 #elif defined DST_QUAD
-typedef long double dst_t;
+typedef tf_float dst_t;
 typedef __uint128_t dst_rep_t;
 #define DST_REP_C (__uint128_t)