From 300e34cf741d37b4a607d20fb8c6cd8db435d249 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Thu, 29 Oct 2020 21:28:41 -0500 Subject: [PATCH] Faster sinh and cosh for Float32, Float64 (#37426) * Faster sinh and cosh for Float32, Float64 Use minimax polynomials to reduce the number of cases needed. * Fix COSH_SMALL_X * I should probably give the functions the correct names. * Make Float32 path for sinh use Float64 for polynomial faster, better accuracy, and doesn't need FMA! * fix doctest? * Add a tiny bit of bonus accuracy for Float64 to re-trigger build * turns out decimal places matter --- base/special/hyperbolic.jl | 159 +++++++++--------- .../manual/complex-and-rational-numbers.md | 2 +- 2 files changed, 79 insertions(+), 82 deletions(-) diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl index 4b0e994b7e610..1914c192df35a 100644 --- a/base/special/hyperbolic.jl +++ b/base/special/hyperbolic.jl @@ -14,6 +14,20 @@ # is preserved. # ==================================================== +@inline function exthorner(x, p::Tuple) + # polynomial evaluation using compensated summation. + # much more accurate, especially when lo can be combined with other rounding errors + hi, lo = p[end], zero(x) + for i in length(p)-1:-1:1 + pi = p[i] + prod = hi*x + err = fma(hi, x, -prod) + hi = pi+prod + lo = fma(lo, x, prod - (hi - pi) + err) + end + return hi, lo +end + # Hyperbolic functions # sinh methods H_SMALL_X(::Type{Float64}) = 2.0^-28 @@ -27,108 +41,91 @@ H_OVERFLOW_X(::Type{Float64}) = 710.475860073944 # nextfloat(710.4758600739439) H_LARGE_X(::Type{Float32}) = 88.72283f0 H_OVERFLOW_X(::Type{Float32}) = 89.415985f0 -function sinh(x::T) where T <: Union{Float32, Float64} + +SINH_SMALL_X(::Type{Float64}) = 2.1 +SINH_SMALL_X(::Type{Float32}) = 3.0f0 + +# For Float64, use DoubleFloat scheme for extra accuracy +function sinh_kernel(x::Float64) + x2 = x*x + x2lo = fma(x,x,-x2) + hi_order = evalpoly(x2, (8.333333333336817e-3, 1.9841269840165435e-4, + 2.7557319381151335e-6, 2.5052096530035283e-8, + 1.6059550718903307e-10, 7.634842144412119e-13, + 2.9696954760355812e-15)) + hi,lo = exthorner(x2, (1.0, 0.16666666666666635, hi_order)) + return muladd(x, hi, muladd(x, lo, x*x2lo*0.16666666666666635)) +end +# For Float32, using Float64 is simpler, faster, and doesn't require FMA +function sinh_kernel(x::Float32) + x=Float64(x) + res = evalpoly(x*x, (1.0, 0.1666666779967941, 0.008333336726447933, + 0.00019841001151414065, 2.7555538207080807e-6, + 2.5143389765825282e-8, 1.6260094552031644e-10)) + return Float32(res*x) +end + + +function sinh(x::T) where T<:Union{Float32,Float64} # Method # mathematically sinh(x) is defined to be (exp(x)-exp(-x))/2 - # 1. Replace x by |x| (sinh(-x) = -sinh(x)). + # 1. Sometimes replace x by |x| (sinh(-x) = -sinh(x)). # 2. Find the branch and the expression to calculate and return it - # a) 0 <= x < H_SMALL_X - # return x - # b) H_SMALL_X <= x < H_MEDIUM_X - # return sinh(x) = (E + E/(E+1))/2, where E=expm1(x) - # c) H_MEDIUM_X <= x < H_LARGE_X - # return sinh(x) = exp(x)/2 + # a) 0 <= x < SINH_SMALL_X + # approximate sinh(x) with a minimax polynomial + # b) SINH_SMALL_X <= x < H_LARGE_X + # return sinh(x) = (exp(x) - exp(-x))/2 # d) H_LARGE_X <= x < H_OVERFLOW_X # return sinh(x) = exp(x/2)/2 * exp(x/2) - # e) H_OVERFLOW_X <= x - # return sinh(x) = T(Inf) - # - # Notes: - # only sinh(0) = 0 is exact for finite x. - - isnan(x) && return x + # Note that this branch automatically deals with Infs and NaNs absx = abs(x) - - h = T(0.5) - if x < 0 - h = -h - end - # in a) or b) - if absx < H_MEDIUM_X(T) - # in a) - if absx < H_SMALL_X(T) - return x - end - t = expm1(absx) - if absx < T(1) - return h*(T(2)*t - t*t/(t + T(1))) - end - return h*(t + t/(t + T(1))) - end - # in c) - if absx < H_LARGE_X(T) - return h*exp(absx) - end - # in d) - if absx < H_OVERFLOW_X(T) - return h*T(2)*_ldexp_exp(absx, Int32(-1)) + if absx <= SINH_SMALL_X(T) + return sinh_kernel(x) + elseif absx >= H_LARGE_X(T) + E = exp(T(.5)*absx) + return copysign(T(.5)*E*E, x) end - # in e) - return copysign(T(Inf), x) + E = exp(absx) + return copysign(T(.5)*(E - 1/E),x) end sinh(x::Real) = sinh(float(x)) -# cosh methods -COSH_SMALL_X(::Type{Float32}) = 0.00024414062f0 -COSH_SMALL_X(::Type{Float64}) = 2.7755602085408512e-17 -function cosh(x::T) where T <: Union{Float32, Float64} +COSH_SMALL_X(::Type{T}) where T= one(T) + +function cosh_kernel(x2::Float32) + return evalpoly(x2, (1.0f0, 0.49999997f0, 0.041666888f0, 0.0013882756f0, 2.549933f-5)) +end + +function cosh_kernel(x2::Float64) + return evalpoly(x2, (1.0, 0.5000000000000002, 0.04166666666666269, + 1.3888888889206764e-3, 2.4801587176784207e-5, + 2.7557345825742837e-7, 2.0873617441235094e-9, + 1.1663435515945578e-11)) +end + +function cosh(x::T) where T<:Union{Float32,Float64} # Method # mathematically cosh(x) is defined to be (exp(x)+exp(-x))/2 # 1. Replace x by |x| (cosh(x) = cosh(-x)). # 2. Find the branch and the expression to calculate and return it # a) x <= COSH_SMALL_X - # return T(1) - # b) COSH_SMALL_X <= x <= ln2/2 - # return 1+expm1(|x|)^2/(2*exp(|x|)) - # c) ln2/2 <= x <= H_MEDIUM_X - # return (exp(|x|)+1/exp(|x|)/2 - # d) H_MEDIUM_X <= x < H_LARGE_X - # return cosh(x) = exp(x)/2 + # approximate sinh(x) with a minimax polynomial + # b) COSH_SMALL_X <= x < H_LARGE_X + # return cosh(x) = = (exp(x) + exp(-x))/2 # e) H_LARGE_X <= x < H_OVERFLOW_X # return cosh(x) = exp(x/2)/2 * exp(x/2) - # f) H_OVERFLOW_X <= x - # return cosh(x) = T(Inf) - - isnan(x) && return x + # Note that this branch automatically deals with Infs and NaNs absx = abs(x) - h = T(0.5) - # in a) or b) - if absx < log(T(2))/2 - # in a) - if absx < COSH_SMALL_X(T) - return T(1) - end - t = expm1(absx) - w = T(1) + t - return T(1) + (t*t)/(w + w) - end - # in c) - if absx < H_MEDIUM_X(T) - t = exp(absx) - return h*t + h/t - end - # in d) - if absx < H_LARGE_X(T) - return h*exp(absx) - end - # in e) - if absx < H_OVERFLOW_X(T) - return _ldexp_exp(absx, Int32(-1)) + if absx <= COSH_SMALL_X(T) + return cosh_kernel(x*x) + elseif absx >= H_LARGE_X(T) + E = exp(T(.5)*absx) + return T(.5)*E*E end - # in f) - return T(Inf) + E = exp(absx) + return T(.5)*(E + 1/E) end cosh(x::Real) = cosh(float(x)) diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md index faf8ffcf8c198..99e4a677e2724 100644 --- a/doc/src/manual/complex-and-rational-numbers.md +++ b/doc/src/manual/complex-and-rational-numbers.md @@ -124,7 +124,7 @@ julia> sqrt(1 + 2im) 1.272019649514069 + 0.7861513777574233im julia> cos(1 + 2im) -2.0327230070196656 - 3.0518977991518im +2.0327230070196656 - 3.0518977991517997im julia> exp(1 + 2im) -1.1312043837568135 + 2.4717266720048188im