From 2dd30f42e4cbb68fad71bcd97569c13bc2ab3d4a Mon Sep 17 00:00:00 2001
From: Chris Elrod <elrodc@gmail.com>
Date: Sun, 10 Jan 2021 07:43:55 -0500
Subject: [PATCH] Add a few precompiles and remove some unnecessary definitions

---
 src/SLEEFPirates.jl | 19 ++++++-------------
 src/double.jl       |  4 ----
 src/precompile.jl   |  6 ++++++
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/SLEEFPirates.jl b/src/SLEEFPirates.jl
index 7a4cc15..c68f216 100644
--- a/src/SLEEFPirates.jl
+++ b/src/SLEEFPirates.jl
@@ -127,8 +127,6 @@ include("misc.jl")   # miscallenous math functions including pow and cbrt
 # include("sleef.jl")
 # include("xsimd.jl")
 
-@inline Base.exp(x::Vec) = exp(x)
-
 # fallback definitions
 
 for func in (:sin, :cos, :tan, :asin, :acos, :atan, :sinh, :cosh, :tanh,
@@ -184,18 +182,13 @@ for func in (:atan, :hypot, :pow)
 end
 ldexp(x::Float16, q::Int) = Float16(ldexpk(Float32(x), q))
 
-@inline logit(x) = log(Base.FastMath.div_fast(x,Base.FastMath.sub_fast(1,x)))
-@inline logit(x::AbstractSIMD{W,T}) where {W,T} = log(x / (vbroadcast(Val{W}(),one(T)) - x))
-@inline invlogit(x) = Base.FastMath.inv_fast(Base.FastMath.add_fast(1, exp(Base.FastMath.sub_fast(x))))
-@inline invlogit(x::AbstractSIMD{W,T}) where {W,T} = (o = vbroadcast(Val{W}(),one(T)); (o / (o + exp(-x))))
-@inline nlogit(x) = log(Base.FastMath.div_fast(Base.FastMath.sub_fast(1,x), x))
-@inline nlogit(x::AbstractSIMD{W,T}) where {W,T} = log((vbroadcast(Val{W}(),one(T)) - x) / x)
-@inline ninvlogit(x) = Base.FastMath.inv_fast(Base.FastMath.add_fast(1, exp(x)))
-@inline ninvlogit(x::AbstractSIMD{W,T}) where {W,T} = inv(vbroadcast(Val{W}(), one(T)) + exp(x))
-@inline log1m(x) = Base.log1p(Base.FastMath.sub_fast(x))
-@inline log1m(v::AbstractSIMD{W,T}) where {W,T} = log1p(-v)
+@inline logit(x) = log(Base.FastMath.div_fast(x,Base.FastMath.sub_fast(one(x),x)))
+@inline invlogit(x) = Base.FastMath.inv_fast(Base.FastMath.add_fast(one(x), exp(Base.FastMath.sub_fast(x))))
+@inline nlogit(x) = log(Base.FastMath.div_fast(Base.FastMath.sub_fast(one(x),x), x))
+@inline ninvlogit(x) = Base.FastMath.inv_fast(Base.FastMath.add_fast(one(x), exp(x)))
+@inline log1m(x) = log1p(Base.FastMath.sub_fast(x))
 @inline function tanh_fast(x)
-    exp2xm1 = expm1(x + x)
+    exp2xm1 = expm1(Base.FastMath.add_fast(x, x))
     exp2xm1 / (exp2xm1 + typeof(x)(2))
 end
 
diff --git a/src/double.jl b/src/double.jl
index 1c09441..b54172b 100644
--- a/src/double.jl
+++ b/src/double.jl
@@ -10,10 +10,6 @@ end
 @inline function Double(x::Vec, y::Vec)
     Double(Vec(data(x)), Vec(data(y)))
 end
-@inline promote_vtype(::Type{Mask{W,U}}, ::Type{Double{V}}) where {W, U, T, V <: AbstractSIMD{W,T}} = Double{V}
-@inline promote_vtype(::Type{Double{V}}, ::Type{Mask{W,U}}) where {W, U, T, V <: AbstractSIMD{W,T}} = Double{V}
-@inline promote_vtype(::Type{Mask{W,U}}, ::Type{Double{T}}) where {W, U, T <: Number} = Double{Vec{W,T}}
-@inline promote_vtype(::Type{Double{T}}, ::Type{Mask{W,U}}) where {W, U, T <: Number} = Double{Vec{W,T}}
 @inline Base.convert(::Type{Double{V}}, v::Vec) where {W,T,V <: AbstractSIMD{W,T}} = Double(convert(V, v), vzero(V))
 @inline Base.convert(::Type{Double{V}}, v::V) where {V <: AbstractSIMD} = Double(v, vzero(V))
 # @inline Base.convert(::Type{Double{V}}, m::Mask) where {V} = m
diff --git a/src/precompile.jl b/src/precompile.jl
index 22c11ca..1b64e35 100644
--- a/src/precompile.jl
+++ b/src/precompile.jl
@@ -1,3 +1,9 @@
 function _precompile_()
     ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
+    for T ∈ (Float32,Float64)
+        W = VectorizationBase.pick_vector_width(T)
+        for f ∈ (log, exp, sin, cos, tan, tanh, tanh_fast)
+            precompile(f, (Vec{W, T}, ))
+        end
+    end
 end