JuliaStats · rofinn · Jun 27, 2019 · Jul 17, 2018 · May 28, 2019 · May 29, 2019
diff --git a/Project.toml b/Project.toml
@@ -13,8 +13,9 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [extras]
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DelimitedFiles", "Test"]
+test = ["Dates", "DelimitedFiles", "Test"]
diff --git a/docs/src/weights.md b/docs/src/weights.md
@@ -50,6 +50,72 @@ w = Weights([1., 2., 3.])
 w = weights([1., 2., 3.])
 ```
 
+### Exponential weights: `eweights`
+
+Exponential weights are a common form of temporal weights which assign exponentially decreasing
+weights to past observations.
+
+If `t` is a vector of temporal indices then for each index `i` we compute the weight as:
+
+``λ (1 - λ)^{1 - i}``
+
+``λ`` is a smoothing factor or rate parameter such that ``0 < λ ≤ 1``.
+As this value approaches 0, the resulting weights will be almost equal,
+while values closer to 1 will put greater weight on the tail elements of the vector.
+
+# Examples
+
+```julia-repl
+julia> eweights(1:10, 0.3)
+10-element Weights{Float64,Float64,Array{Float64,1}}:
+ 0.3
+ 0.42857142857142855
+ 0.6122448979591837
+ 0.8746355685131197
+ 1.249479383590171
+ 1.7849705479859588
+ 2.549957925694227
+ 3.642797036706039
+ 5.203995766722913
+ 7.434279666747019
+```
+
+Simply passing the number of observations `n` is equivalent to passing in `1:n`.
+
+```julia-repl
+julia> eweights(10, 0.3)
+10-element Weights{Float64,Float64,Array{Float64,1}}:
+ 0.3
+ 0.42857142857142855
+ 0.6122448979591837
+ 0.8746355685131197
+ 1.249479383590171
+ 1.7849705479859588
+ 2.549957925694227
+ 3.642797036706039
+ 5.203995766722913
+ 7.434279666747019
+```
+
+Finally, you can construct exponential weights from an arbitrary subset of timestamps within a larger range.
+
+```julia-repl
+julia> t
+2019-01-01T01:00:00:2 hours:2019-01-01T05:00:00
+
+julia> r
+2019-01-01T01:00:00:1 hour:2019-01-02T01:00:00
+
+julia> eweights(t, r, 0.3)
+3-element Weights{Float64,Float64,Array{Float64,1}}:
+ 0.3
+ 0.6122448979591837
+ 1.249479383590171
+```
+
+NOTE: This is equivalent to `eweights(something.(indexin(t, r)), 0.3)`, which is saying that for each value in `t` return the corresponding index for that value in `r`.
+Since `indexin` returns `nothing` if there is no corresponding value from `t` in `r` we use `something` to eliminate that possibility.
+
 ## Methods
 
 `AbstractWeights` implements the following methods:
@@ -70,5 +136,6 @@ Weights
 aweights
 fweights
 pweights
+eweights
 weights
-```
+```
diff --git a/src/StatsBase.jl b/src/StatsBase.jl
@@ -34,6 +34,7 @@ export
     aweights,           # construct an AnalyticWeights vector
     fweights,           # construct a FrequencyWeights vector
     pweights,           # construct a ProbabilityWeights vector
+    eweights,           # construct an exponential Weights vector
     wsum,               # weighted sum with vector as second argument
     wsum!,              # weighted sum across dimensions with provided storage
     wmean,              # weighted mean

diff --git a/src/weights.jl b/src/weights.jl
@@ -193,6 +193,63 @@ pweights(vs::RealArray) = ProbabilityWeights(vec(vs))
     end
 end
 
+"""
+    eweights(t::AbstractVector{<:Integer}, λ::Real)
+    eweights(t::AbstractVector{T}, r::StepRange{T}, λ::Real) where T
+    eweights(n::Integer, λ::Real)
+
+Construct a [`Weights`](@ref) vector which assigns exponentially decreasing weights to past
+observations, which in this case corresponds to larger integer values `i` in `t`.
+If an integer `n` is provided, weights are generated for values from 1 to `n`
+(equivalent to `t = 1:n`).
+
+For each element `i` in `t` the weight value is computed as:
+
+``λ (1 - λ)^{1 - i}``
+
+# Arguments
+
+- `t::AbstractVector`: temporal indices or timestamps
+- `r::StepRange`: a larger range to use when constructing weights from a subset of timestamps
+- `n::Integer`: if provided instead of `t`, temporal indices are taken to be `1:n`
+- `λ::Real`: a smoothing factor or rate parameter such that ``0 < λ ≤ 1``.
+  As this value approaches 0, the resulting weights will be almost equal,
+  while values closer to 1 will put greater weight on the tail elements of the vector.
+
+# Examples
+```julia-repl
+julia> eweights(1:10, 0.3)
+10-element Weights{Float64,Float64,Array{Float64,1}}:
+ 0.3
+ 0.42857142857142855
+ 0.6122448979591837
+ 0.8746355685131197
+ 1.249479383590171
+ 1.7849705479859588
+ 2.549957925694227
+ 3.642797036706039
+ 5.203995766722913
+ 7.434279666747019
+```
+"""
+function eweights(t::AbstractVector{T}, λ::Real) where T<:Integer
+    0 < λ <= 1 || throw(ArgumentError("Smoothing factor must be between 0 and 1"))
+
+    w0 = map(t) do i
+        i > 0 || throw(ArgumentError("Time indices must be non-zero positive integers"))
+        λ * (1 - λ)^(1 - i)
+    end
+
+    s = sum(w0)
+    Weights(w0, s)
+end
+
+eweights(n::Integer, λ::Real) = eweights(1:n, λ)
+eweights(t::AbstractVector, r::AbstractRange, λ::Real) =
+    eweights(something.(indexin(t, r)), λ)
+
+# NOTE: No variance correction is implemented for exponential weights
+
 ##### Equality tests #####
 
 for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights)
@@ -481,7 +538,7 @@ _mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T,W} =
 Compute the weighted quantiles of a vector `v` at a specified set of probability
 values `p`, using weights given by a weight vector `w` (of type `AbstractWeights`).
 Weights must not be negative. The weights and data vectors must have the same length.
-`NaN` is returned if `x` contains any `NaN` values. An error is raised if `w` contains 
+`NaN` is returned if `x` contains any `NaN` values. An error is raised if `w` contains
 any `NaN` values.
 
 With [`FrequencyWeights`](@ref), the function returns the same result as
@@ -502,15 +559,15 @@ function quantile(v::RealVector{V}, w::AbstractWeights{W}, p::RealVector) where
     all(x -> 0 <= x <= 1, p) || throw(ArgumentError("input probability out of [0,1] range"))
 
     w.sum == 0 && throw(ArgumentError("weight vector cannot sum to zero"))
-    length(v) == length(w) || throw(ArgumentError("data and weight vectors must be the same size," * 
+    length(v) == length(w) || throw(ArgumentError("data and weight vectors must be the same size," *
         "got $(length(v)) and $(length(w))"))
     for x in w.values
         isnan(x) && throw(ArgumentError("weight vector cannot contain NaN entries"))
         x < 0 && throw(ArgumentError("weight vector cannot contain negative entries"))
     end
 
-    isa(w, FrequencyWeights) && !(eltype(w) <: Integer) && any(!isinteger, w) && 
-        throw(ArgumentError("The values of the vector of `FrequencyWeights` must be numerically" * 
+    isa(w, FrequencyWeights) && !(eltype(w) <: Integer) && any(!isinteger, w) &&
+        throw(ArgumentError("The values of the vector of `FrequencyWeights` must be numerically" *
                             "equal to integers. Use `ProbabilityWeights` or `AnalyticWeights` instead."))
 
     # remove zeros weights and sort

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,5 @@
 using StatsBase
+using Dates
 using LinearAlgebra
 using Random
 using Statistics

diff --git a/test/weights.jl b/test/weights.jl
@@ -447,4 +447,55 @@ end
     @test round(mean(Union{Int,Missing}[1,2], weights([1,2])), digits=3) ≈ 1.667
 end
 
+@testset "Exponential Weights" begin
+    @testset "Usage" begin
+        θ = 5.25
+        λ = 1 - exp(-1 / θ)     # simple conversion for the more common/readable method
+        v = [λ*(1-λ)^(1-i) for i = 1:4]
+        w = Weights(v)
+
+        @test round.(w, digits=4) == [0.1734, 0.2098, 0.2539, 0.3071]
+
+        @testset "basic" begin
+            @test eweights(1:4, λ) ≈ w
+        end
+
+        @testset "1:n" begin
+            @test eweights(4, λ) ≈ w
+        end
+
+        @testset "indexin" begin
+            v = [λ*(1-λ)^(1-i) for i = 1:10]
+
+            # Test that we should be able to skip indices easily
+            @test eweights([1, 3, 5, 7], 1:10, λ) ≈ Weights(v[[1, 3, 5, 7]])
+
+            # This should also work with actual time types
+            t1 = DateTime(2019, 1, 1, 1)
+            tx = t1 + Hour(7)
+            tn = DateTime(2019, 1, 2, 1)
+
+            @test eweights(t1:Hour(2):tx, t1:Hour(1):tn, λ) ≈ Weights(v[[1, 3, 5, 7]])
+        end
+    end
+
+    @testset "Empty" begin
+        @test eweights(0, 0.3) == Weights(Float64[])
+        @test eweights(1:0, 0.3) == Weights(Float64[])
+        @test eweights(Int[], 1:10, 0.4) == Weights(Float64[])
+    end
+
+    @testset "Failure Conditions" begin
+        # λ > 1.0
+        @test_throws ArgumentError eweights(1, 1.1)
+
+        # time indices are not all positive non-zero integers
+        @test_throws ArgumentError eweights([0, 1, 2, 3], 0.3)
+
+        # Passing in an array of bools will work because Bool <: Integer,
+        # but any `false` values will trigger the same argument error as 0.0
+        @test_throws ArgumentError eweights([true, false, true, true], 0.3)
+    end
+end
+
 end # @testset StatsBase.Weights