Skip to content

Commit

Permalink
Faster sinh and cosh for Float32, Float64 (JuliaLang#37426)
Browse files Browse the repository at this point in the history
* Faster sinh and cosh for Float32, Float64

Use minimax polynomials to reduce the number of cases needed.

* Fix COSH_SMALL_X

* I should probably give the functions the correct names.

* Make Float32 path for sinh use Float64 for polynomial

faster, better accuracy, and doesn't need FMA!

* fix doctest?

* Add a tiny bit of bonus accuracy for Float64 to re-trigger build

* turns out decimal places matter
  • Loading branch information
oscardssmith authored Oct 30, 2020
1 parent 78ade6c commit 300e34c
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 82 deletions.
159 changes: 78 additions & 81 deletions base/special/hyperbolic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@
# is preserved.
# ====================================================

@inline function exthorner(x, p::Tuple)
# polynomial evaluation using compensated summation.
# much more accurate, especially when lo can be combined with other rounding errors
hi, lo = p[end], zero(x)
for i in length(p)-1:-1:1
pi = p[i]
prod = hi*x
err = fma(hi, x, -prod)
hi = pi+prod
lo = fma(lo, x, prod - (hi - pi) + err)
end
return hi, lo
end

# Hyperbolic functions
# sinh methods
H_SMALL_X(::Type{Float64}) = 2.0^-28
Expand All @@ -27,108 +41,91 @@ H_OVERFLOW_X(::Type{Float64}) = 710.475860073944 # nextfloat(710.4758600739439)

H_LARGE_X(::Type{Float32}) = 88.72283f0
H_OVERFLOW_X(::Type{Float32}) = 89.415985f0
function sinh(x::T) where T <: Union{Float32, Float64}

SINH_SMALL_X(::Type{Float64}) = 2.1
SINH_SMALL_X(::Type{Float32}) = 3.0f0

# For Float64, use DoubleFloat scheme for extra accuracy
function sinh_kernel(x::Float64)
x2 = x*x
x2lo = fma(x,x,-x2)
hi_order = evalpoly(x2, (8.333333333336817e-3, 1.9841269840165435e-4,
2.7557319381151335e-6, 2.5052096530035283e-8,
1.6059550718903307e-10, 7.634842144412119e-13,
2.9696954760355812e-15))
hi,lo = exthorner(x2, (1.0, 0.16666666666666635, hi_order))
return muladd(x, hi, muladd(x, lo, x*x2lo*0.16666666666666635))
end
# For Float32, using Float64 is simpler, faster, and doesn't require FMA
function sinh_kernel(x::Float32)
x=Float64(x)
res = evalpoly(x*x, (1.0, 0.1666666779967941, 0.008333336726447933,
0.00019841001151414065, 2.7555538207080807e-6,
2.5143389765825282e-8, 1.6260094552031644e-10))
return Float32(res*x)
end


function sinh(x::T) where T<:Union{Float32,Float64}
# Method
# mathematically sinh(x) is defined to be (exp(x)-exp(-x))/2
# 1. Replace x by |x| (sinh(-x) = -sinh(x)).
# 1. Sometimes replace x by |x| (sinh(-x) = -sinh(x)).
# 2. Find the branch and the expression to calculate and return it
# a) 0 <= x < H_SMALL_X
# return x
# b) H_SMALL_X <= x < H_MEDIUM_X
# return sinh(x) = (E + E/(E+1))/2, where E=expm1(x)
# c) H_MEDIUM_X <= x < H_LARGE_X
# return sinh(x) = exp(x)/2
# a) 0 <= x < SINH_SMALL_X
# approximate sinh(x) with a minimax polynomial
# b) SINH_SMALL_X <= x < H_LARGE_X
# return sinh(x) = (exp(x) - exp(-x))/2
# d) H_LARGE_X <= x < H_OVERFLOW_X
# return sinh(x) = exp(x/2)/2 * exp(x/2)
# e) H_OVERFLOW_X <= x
# return sinh(x) = T(Inf)
#
# Notes:
# only sinh(0) = 0 is exact for finite x.

isnan(x) && return x
# Note that this branch automatically deals with Infs and NaNs

absx = abs(x)

h = T(0.5)
if x < 0
h = -h
end
# in a) or b)
if absx < H_MEDIUM_X(T)
# in a)
if absx < H_SMALL_X(T)
return x
end
t = expm1(absx)
if absx < T(1)
return h*(T(2)*t - t*t/(t + T(1)))
end
return h*(t + t/(t + T(1)))
end
# in c)
if absx < H_LARGE_X(T)
return h*exp(absx)
end
# in d)
if absx < H_OVERFLOW_X(T)
return h*T(2)*_ldexp_exp(absx, Int32(-1))
if absx <= SINH_SMALL_X(T)
return sinh_kernel(x)
elseif absx >= H_LARGE_X(T)
E = exp(T(.5)*absx)
return copysign(T(.5)*E*E, x)
end
# in e)
return copysign(T(Inf), x)
E = exp(absx)
return copysign(T(.5)*(E - 1/E),x)
end
sinh(x::Real) = sinh(float(x))

# cosh methods
COSH_SMALL_X(::Type{Float32}) = 0.00024414062f0
COSH_SMALL_X(::Type{Float64}) = 2.7755602085408512e-17
function cosh(x::T) where T <: Union{Float32, Float64}
COSH_SMALL_X(::Type{T}) where T= one(T)

function cosh_kernel(x2::Float32)
return evalpoly(x2, (1.0f0, 0.49999997f0, 0.041666888f0, 0.0013882756f0, 2.549933f-5))
end

function cosh_kernel(x2::Float64)
return evalpoly(x2, (1.0, 0.5000000000000002, 0.04166666666666269,
1.3888888889206764e-3, 2.4801587176784207e-5,
2.7557345825742837e-7, 2.0873617441235094e-9,
1.1663435515945578e-11))
end

function cosh(x::T) where T<:Union{Float32,Float64}
# Method
# mathematically cosh(x) is defined to be (exp(x)+exp(-x))/2
# 1. Replace x by |x| (cosh(x) = cosh(-x)).
# 2. Find the branch and the expression to calculate and return it
# a) x <= COSH_SMALL_X
# return T(1)
# b) COSH_SMALL_X <= x <= ln2/2
# return 1+expm1(|x|)^2/(2*exp(|x|))
# c) ln2/2 <= x <= H_MEDIUM_X
# return (exp(|x|)+1/exp(|x|)/2
# d) H_MEDIUM_X <= x < H_LARGE_X
# return cosh(x) = exp(x)/2
# approximate sinh(x) with a minimax polynomial
# b) COSH_SMALL_X <= x < H_LARGE_X
# return cosh(x) = = (exp(x) + exp(-x))/2
# e) H_LARGE_X <= x < H_OVERFLOW_X
# return cosh(x) = exp(x/2)/2 * exp(x/2)
# f) H_OVERFLOW_X <= x
# return cosh(x) = T(Inf)

isnan(x) && return x
# Note that this branch automatically deals with Infs and NaNs

absx = abs(x)
h = T(0.5)
# in a) or b)
if absx < log(T(2))/2
# in a)
if absx < COSH_SMALL_X(T)
return T(1)
end
t = expm1(absx)
w = T(1) + t
return T(1) + (t*t)/(w + w)
end
# in c)
if absx < H_MEDIUM_X(T)
t = exp(absx)
return h*t + h/t
end
# in d)
if absx < H_LARGE_X(T)
return h*exp(absx)
end
# in e)
if absx < H_OVERFLOW_X(T)
return _ldexp_exp(absx, Int32(-1))
if absx <= COSH_SMALL_X(T)
return cosh_kernel(x*x)
elseif absx >= H_LARGE_X(T)
E = exp(T(.5)*absx)
return T(.5)*E*E
end
# in f)
return T(Inf)
E = exp(absx)
return T(.5)*(E + 1/E)
end
cosh(x::Real) = cosh(float(x))

Expand Down
2 changes: 1 addition & 1 deletion doc/src/manual/complex-and-rational-numbers.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ julia> sqrt(1 + 2im)
1.272019649514069 + 0.7861513777574233im
julia> cos(1 + 2im)
2.0327230070196656 - 3.0518977991518im
2.0327230070196656 - 3.0518977991517997im
julia> exp(1 + 2im)
-1.1312043837568135 + 2.4717266720048188im
Expand Down

0 comments on commit 300e34c

Please sign in to comment.