Faster sinh and cosh for Float32, Float64 (JuliaLang#37426)

* Faster sinh and cosh for Float32, Float64 Use minimax polynomials to reduce the number of cases needed. * Fix COSH_SMALL_X * I should probably give the functions the correct names. * Make Float32 path for sinh use Float64 for polynomial faster, better accuracy, and doesn't need FMA! * fix doctest? * Add a tiny bit of bonus accuracy for Float64 to re-trigger build * turns out decimal places matter
ElOceanografo · Oct 30, 2020 · 300e34c · 300e34c
1 parent 78ade6c
commit 300e34c
Show file tree

Hide file tree

Showing 2 changed files with 79 additions and 82 deletions.
diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl
@@ -14,6 +14,20 @@
 # is preserved.
 # ====================================================
 
+@inline function exthorner(x, p::Tuple)
+	# polynomial evaluation using compensated summation.
+	# much more accurate, especially when lo can be combined with other rounding errors
+    hi, lo = p[end], zero(x)
+    for i in length(p)-1:-1:1
+        pi = p[i]
+        prod = hi*x
+        err = fma(hi, x, -prod)
+        hi = pi+prod
+        lo = fma(lo, x, prod - (hi - pi) + err)
+    end
+    return hi, lo
+end
+
 # Hyperbolic functions
 # sinh methods
 H_SMALL_X(::Type{Float64}) = 2.0^-28
@@ -27,108 +41,91 @@ H_OVERFLOW_X(::Type{Float64}) = 710.475860073944 # nextfloat(710.4758600739439)
 
 H_LARGE_X(::Type{Float32}) = 88.72283f0
 H_OVERFLOW_X(::Type{Float32}) = 89.415985f0
-function sinh(x::T) where T <: Union{Float32, Float64}
+
+SINH_SMALL_X(::Type{Float64}) = 2.1
+SINH_SMALL_X(::Type{Float32}) = 3.0f0
+
+# For Float64, use DoubleFloat scheme for extra accuracy
+function sinh_kernel(x::Float64)
+    x2 = x*x
+    x2lo = fma(x,x,-x2)
+    hi_order = evalpoly(x2, (8.333333333336817e-3, 1.9841269840165435e-4,
+                             2.7557319381151335e-6, 2.5052096530035283e-8,
+                             1.6059550718903307e-10, 7.634842144412119e-13,
+                             2.9696954760355812e-15))
+    hi,lo = exthorner(x2, (1.0, 0.16666666666666635, hi_order))
+    return muladd(x, hi, muladd(x, lo, x*x2lo*0.16666666666666635))
+end
+# For Float32, using Float64 is simpler, faster, and doesn't require FMA
+function sinh_kernel(x::Float32)
+    x=Float64(x)
+    res = evalpoly(x*x, (1.0, 0.1666666779967941, 0.008333336726447933,
+                         0.00019841001151414065, 2.7555538207080807e-6,
+                         2.5143389765825282e-8, 1.6260094552031644e-10))
+    return Float32(res*x)
+end
+
+
+function sinh(x::T) where T<:Union{Float32,Float64}
     # Method
     # mathematically sinh(x) is defined to be (exp(x)-exp(-x))/2
-    #    1. Replace x by |x| (sinh(-x) = -sinh(x)).
+    #    1. Sometimes replace x by |x| (sinh(-x) = -sinh(x)).
     #    2. Find the branch and the expression to calculate and return it
-    #      a)   0 <= x < H_SMALL_X
-    #               return x
-    #      b)   H_SMALL_X <= x < H_MEDIUM_X
-    #               return sinh(x) = (E + E/(E+1))/2, where E=expm1(x)
-    #      c)   H_MEDIUM_X <= x < H_LARGE_X
-    #               return sinh(x) = exp(x)/2
+    #      a)   0 <= x < SINH_SMALL_X
+    #               approximate sinh(x) with a  minimax polynomial
+    #      b)   SINH_SMALL_X <= x < H_LARGE_X
+    #               return sinh(x) = (exp(x) - exp(-x))/2
     #      d)   H_LARGE_X  <= x < H_OVERFLOW_X
     #               return sinh(x) = exp(x/2)/2 * exp(x/2)
-    #      e)   H_OVERFLOW_X <=  x
-    #               return sinh(x) = T(Inf)
-    #
-    # Notes:
-    #    only sinh(0) = 0 is exact for finite x.
-
-    isnan(x) && return x
+    #               Note that this branch automatically deals with Infs and NaNs
 
     absx = abs(x)
-
-    h = T(0.5)
-    if x < 0
-        h = -h
-    end
-    # in a) or b)
-    if absx < H_MEDIUM_X(T)
-        # in a)
-        if absx < H_SMALL_X(T)
-            return x
-        end
-        t = expm1(absx)
-        if absx < T(1)
-            return h*(T(2)*t - t*t/(t + T(1)))
-        end
-        return h*(t + t/(t + T(1)))
-    end
-    # in c)
-    if absx < H_LARGE_X(T)
-        return h*exp(absx)
-    end
-    # in d)
-    if absx < H_OVERFLOW_X(T)
-        return h*T(2)*_ldexp_exp(absx, Int32(-1))
+    if absx <= SINH_SMALL_X(T)
+        return sinh_kernel(x)
+    elseif absx >= H_LARGE_X(T)
+        E = exp(T(.5)*absx)
+        return copysign(T(.5)*E*E, x)
     end
-    # in e)
-    return copysign(T(Inf), x)
+    E = exp(absx)
+    return copysign(T(.5)*(E - 1/E),x)
 end
 sinh(x::Real) = sinh(float(x))
 
-# cosh methods
-COSH_SMALL_X(::Type{Float32}) = 0.00024414062f0
-COSH_SMALL_X(::Type{Float64}) = 2.7755602085408512e-17
-function cosh(x::T) where T <: Union{Float32, Float64}
+COSH_SMALL_X(::Type{T}) where T= one(T)
+
+function cosh_kernel(x2::Float32)
+    return evalpoly(x2, (1.0f0, 0.49999997f0, 0.041666888f0, 0.0013882756f0, 2.549933f-5))
+end
+
+function cosh_kernel(x2::Float64)
+    return evalpoly(x2, (1.0, 0.5000000000000002, 0.04166666666666269,
+                         1.3888888889206764e-3, 2.4801587176784207e-5,
+                         2.7557345825742837e-7, 2.0873617441235094e-9,
+                         1.1663435515945578e-11))
+end
+
+function cosh(x::T) where T<:Union{Float32,Float64}
     # Method
     # mathematically cosh(x) is defined to be (exp(x)+exp(-x))/2
     #    1. Replace x by |x| (cosh(x) = cosh(-x)).
     #    2. Find the branch and the expression to calculate and return it
     #      a)   x <= COSH_SMALL_X
-    #               return T(1)
-    #      b)   COSH_SMALL_X <= x <= ln2/2
-    #               return 1+expm1(|x|)^2/(2*exp(|x|))
-    #      c)   ln2/2 <= x <= H_MEDIUM_X
-    #               return (exp(|x|)+1/exp(|x|)/2
-    #      d)   H_MEDIUM_X <= x < H_LARGE_X
-    #               return cosh(x) = exp(x)/2
+    #               approximate sinh(x) with a minimax polynomial
+    #      b)   COSH_SMALL_X <= x < H_LARGE_X
+    #               return cosh(x) = = (exp(x) + exp(-x))/2
     #      e)   H_LARGE_X  <= x < H_OVERFLOW_X
     #               return cosh(x) = exp(x/2)/2 * exp(x/2)
-    #      f)   H_OVERFLOW_X <=  x
-    #               return cosh(x) = T(Inf)
-
-    isnan(x) && return x
+    #      			Note that this branch automatically deals with Infs and NaNs
 
     absx = abs(x)
-    h = T(0.5)
-    # in a) or b)
-    if absx < log(T(2))/2
-        # in a)
-        if absx < COSH_SMALL_X(T)
-            return T(1)
-        end
-        t = expm1(absx)
-        w = T(1) + t
-        return T(1) + (t*t)/(w + w)
-    end
-    # in c)
-    if absx < H_MEDIUM_X(T)
-        t = exp(absx)
-        return h*t + h/t
-    end
-    # in d)
-    if absx < H_LARGE_X(T)
-        return h*exp(absx)
-    end
-    # in e)
-    if absx < H_OVERFLOW_X(T)
-        return _ldexp_exp(absx, Int32(-1))
+    if absx <= COSH_SMALL_X(T)
+        return cosh_kernel(x*x)
+    elseif absx >= H_LARGE_X(T)
+        E = exp(T(.5)*absx)
+        return T(.5)*E*E
     end
-    # in f)
-    return T(Inf)
+    E = exp(absx)
+    return T(.5)*(E + 1/E)
 end
 cosh(x::Real) = cosh(float(x))
 

diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
@@ -124,7 +124,7 @@ julia> sqrt(1 + 2im)
 1.272019649514069 + 0.7861513777574233im
 
 julia> cos(1 + 2im)
-2.0327230070196656 - 3.0518977991518im
+2.0327230070196656 - 3.0518977991517997im
 
 julia> exp(1 + 2im)
 -1.1312043837568135 + 2.4717266720048188im