From b2c01a5ee93ce44283f31c033a71f3fa9abdd0dc Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Tue, 17 Jul 2018 16:07:04 -0700 Subject: [PATCH 01/13] Add exponential weights Co-authored-by: Alex Arslan --- docs/src/weights.md | 14 +++++++++++- src/StatsBase.jl | 2 ++ src/weights.jl | 52 ++++++++++++++++++++++++++++++++++++++++++++- test/weights.jl | 20 +++++++++++++++++ 4 files changed, 86 insertions(+), 2 deletions(-) diff --git a/docs/src/weights.md b/docs/src/weights.md index e8322bfaa..424d207f9 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -41,6 +41,16 @@ w = ProbabilityWeights([0.2, 0.1, 0.3]) w = pweights([0.2, 0.1, 0.3]) ``` +### `ExponentialWeights` + +Exponential weights are a common form of temporal weights which assign exponentially decreasing +weight to past observations. + +```julia +w = ExponentialWeights([0.1837, 0.2222, 0.2688, 0.3253]) +w = eweights(4, 0.173) # construction based on length and rate parameter +``` + ### `Weights` The `Weights` type describes a generic weights vector which does not support all operations possible for `FrequencyWeights`, `AnalyticWeights` and `ProbabilityWeights`. @@ -66,9 +76,11 @@ The following constructors are provided: AnalyticWeights FrequencyWeights ProbabilityWeights +ExponentialWeights Weights aweights fweights pweights +eweights weights -``` \ No newline at end of file +``` diff --git a/src/StatsBase.jl b/src/StatsBase.jl index af6a47aa8..46cd90aad 100644 --- a/src/StatsBase.jl +++ b/src/StatsBase.jl @@ -30,10 +30,12 @@ export AnalyticWeights, # to represent an analytic/precision/reliability weight vector FrequencyWeights, # to representing a frequency/case/repeat weight vector ProbabilityWeights, # to representing a probability/sampling weight vector + ExponentialWeights, # to represent an exponential weight vector weights, # construct a generic Weights vector aweights, # construct an AnalyticWeights vector fweights, # construct a FrequencyWeights vector pweights, # construct a ProbabilityWeights vector + eweights, # construct an ExponentialWeights vector wsum, # weighted sum with vector as second argument wsum!, # weighted sum across dimensions with provided storage wmean, # weighted mean diff --git a/src/weights.jl b/src/weights.jl index 174753370..0e2f6af2e 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -193,9 +193,59 @@ pweights(vs::RealArray) = ProbabilityWeights(vec(vs)) end end +@weights ExponentialWeights + +@doc """ + ExponentialWeights(vs, wsum=sum(vs)) + +Construct an `ExponentialWeights` vector with weight values `vs`. +A precomputed sum may be provided as `wsum`. + +Exponential weights are a common form of temporal weights which assign exponentially +decreasing weight to past observations, which in this case corresponds to the front of +the vector. That is, newer observations are assumed to be at the end. +""" ExponentialWeights + +""" + eweights(n, λ) + +Construct an [`ExponentialWeights`](@ref) vector with length `n`, +where each element in position ``i`` is set to ``λ (1 - λ)^{1 - i}``. + +``λ`` is a smoothing factor or rate parameter such that ``0 < λ \\leq 1``. +As this value approaches 0, the resulting weights will be almost equal, +while values closer to 1 will put greater weight on the tail elements of the vector. + +# Examples + +```julia-repl +julia> eweights(10, 0.3) +10-element ExponentialWeights{Float64,Float64,Array{Float64,1}}: + 0.3 + 0.42857142857142855 + 0.6122448979591837 + 0.8746355685131197 + 1.249479383590171 + 1.7849705479859588 + 2.549957925694227 + 3.642797036706039 + 5.203995766722913 + 7.434279666747019 +``` +""" +function eweights(n::Integer, λ::Real) + n > 0 || throw(ArgumentError("cannot construct exponential weights of length < 1")) + 0 < λ <= 1 || throw(ArgumentError("smoothing factor must be between 0 and 1")) + w0 = map(i -> λ * (1 - λ)^(1 - i), 1:n) + s = sum(w0) + ExponentialWeights{typeof(s), eltype(w0), typeof(w0)}(w0, s) +end + +# NOTE: No variance correction is implemented for exponential weights + ##### Equality tests ##### -for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights) +for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, ExponentialWeights, Weights) @eval begin Base.isequal(x::$w, y::$w) = isequal(x.sum, y.sum) && isequal(x.values, y.values) Base.:(==)(x::$w, y::$w) = (x.sum == y.sum) && (x.values == y.values) diff --git a/test/weights.jl b/test/weights.jl index fa8f40be4..ecd07f90d 100644 --- a/test/weights.jl +++ b/test/weights.jl @@ -2,6 +2,8 @@ using StatsBase using LinearAlgebra, Random, SparseArrays, Test @testset "StatsBase.Weights" begin +# NOTE: Do not add eweights here, as its methods don't match those of the others, so the +# tests below don't make sense for it weight_funcs = (weights, aweights, fweights, pweights) # Construction @@ -447,4 +449,22 @@ end @test round(mean(Union{Int,Missing}[1,2], weights([1,2])), digits=3) ≈ 1.667 end +@testset "ExponentialWeights" begin + @testset "Basic Usage" begin + θ = 5.25 + λ = 1 - exp(-1 / θ) # simple conversion for the more common/readable method + + v = [λ*(1-λ)^(1-i) for i = 1:4] + w = ExponentialWeights(v) + + @test round.(w, digits=4) == [0.1734, 0.2098, 0.2539, 0.3071] + @test eweights(4, λ) ≈ w + end + + @testset "Failure Conditions" begin + @test_throws ArgumentError eweights(0, 0.3) + @test_throws ArgumentError eweights(1, 1.1) + end +end + end # @testset StatsBase.Weights From f701581313d33fd4efa4c10aa576613e550a75f7 Mon Sep 17 00:00:00 2001 From: rofinn Date: Tue, 28 May 2019 16:02:05 -0500 Subject: [PATCH 02/13] Clean eweights code to use the default `Weights` type and support alternate methods. --- Project.toml | 3 +- docs/src/weights.md | 17 +++++----- src/StatsBase.jl | 3 +- src/weights.jl | 80 +++++++++++++++++++++++++++++++-------------- test/runtests.jl | 1 + test/weights.jl | 40 ++++++++++++++++++++--- 6 files changed, 102 insertions(+), 42 deletions(-) diff --git a/Project.toml b/Project.toml index 69967992b..215bf0b43 100644 --- a/Project.toml +++ b/Project.toml @@ -13,8 +13,9 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [extras] +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DelimitedFiles", "Test"] +test = ["Dates", "DelimitedFiles", "Test"] diff --git a/docs/src/weights.md b/docs/src/weights.md index 424d207f9..94f02ebad 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -41,23 +41,22 @@ w = ProbabilityWeights([0.2, 0.1, 0.3]) w = pweights([0.2, 0.1, 0.3]) ``` -### `ExponentialWeights` +### `Weights` -Exponential weights are a common form of temporal weights which assign exponentially decreasing -weight to past observations. +The `Weights` type describes a generic weights vector which does not support all operations possible for `FrequencyWeights`, `AnalyticWeights` and `ProbabilityWeights`. ```julia -w = ExponentialWeights([0.1837, 0.2222, 0.2688, 0.3253]) -w = eweights(4, 0.173) # construction based on length and rate parameter +w = Weights([1., 2., 3.]) +w = weights([1., 2., 3.]) ``` -### `Weights` +### `eweights` -The `Weights` type describes a generic weights vector which does not support all operations possible for `FrequencyWeights`, `AnalyticWeights` and `ProbabilityWeights`. +Exponential weights are a common form of temporal weights which assign exponentially decreasing +weight to past observations. ```julia -w = Weights([1., 2., 3.]) -w = weights([1., 2., 3.]) +w = eweights(4, 0.173) # construction based on length and rate parameter ``` ## Methods diff --git a/src/StatsBase.jl b/src/StatsBase.jl index 46cd90aad..102af4ab9 100644 --- a/src/StatsBase.jl +++ b/src/StatsBase.jl @@ -30,12 +30,11 @@ export AnalyticWeights, # to represent an analytic/precision/reliability weight vector FrequencyWeights, # to representing a frequency/case/repeat weight vector ProbabilityWeights, # to representing a probability/sampling weight vector - ExponentialWeights, # to represent an exponential weight vector weights, # construct a generic Weights vector aweights, # construct an AnalyticWeights vector fweights, # construct a FrequencyWeights vector pweights, # construct a ProbabilityWeights vector - eweights, # construct an ExponentialWeights vector + eweights, # construct an exponential Weights vector wsum, # weighted sum with vector as second argument wsum!, # weighted sum across dimensions with provided storage wmean, # weighted mean diff --git a/src/weights.jl b/src/weights.jl index 0e2f6af2e..06b803651 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -193,34 +193,42 @@ pweights(vs::RealArray) = ProbabilityWeights(vec(vs)) end end -@weights ExponentialWeights - -@doc """ - ExponentialWeights(vs, wsum=sum(vs)) - -Construct an `ExponentialWeights` vector with weight values `vs`. -A precomputed sum may be provided as `wsum`. +""" + eweights(t::AbstractVector{<:Integer}, λ::Real) + eweights(t::AbstractVector{T}, r::StepRange{T}, λ::Real) where T + eweights(n::Integer, λ::Real) -Exponential weights are a common form of temporal weights which assign exponentially -decreasing weight to past observations, which in this case corresponds to the front of -the vector. That is, newer observations are assumed to be at the end. -""" ExponentialWeights +Construct [`Weights`](@ref) vector which assigns exponentially decreasing weights to past +observations, which in this case corresponds to larger integer values `i` in `t`. -""" - eweights(n, λ) +For each element `i` in `t` the weight value is computed as: -Construct an [`ExponentialWeights`](@ref) vector with length `n`, -where each element in position ``i`` is set to ``λ (1 - λ)^{1 - i}``. +``λ (1 - λ)^{1 - i}`` ``λ`` is a smoothing factor or rate parameter such that ``0 < λ \\leq 1``. As this value approaches 0, the resulting weights will be almost equal, while values closer to 1 will put greater weight on the tail elements of the vector. # Examples +```julia-repl +julia> eweights(1:10, 0.3) +10-element Weights{Float64,Float64,Array{Float64,1}}: + 0.3 + 0.42857142857142855 + 0.6122448979591837 + 0.8746355685131197 + 1.249479383590171 + 1.7849705479859588 + 2.549957925694227 + 3.642797036706039 + 5.203995766722913 + 7.434279666747019 +``` +Simply passing the number of observations `n` is equivalent to passing in `1:n`. ```julia-repl julia> eweights(10, 0.3) -10-element ExponentialWeights{Float64,Float64,Array{Float64,1}}: +10-element Weights{Float64,Float64,Array{Float64,1}}: 0.3 0.42857142857142855 0.6122448979591837 @@ -232,20 +240,42 @@ julia> eweights(10, 0.3) 5.203995766722913 7.434279666747019 ``` + +Finally, passing arbitrary times and a step range is equivalent to passing +`something.(indexin(t, r))`. +```julia-repl +julia> eweights([1, 3, 5], 1:10, 0.3) +3-element Weights{Float64,Float64,Array{Float64,1}}: + 0.3 + 0.6122448979591837 + 1.249479383590171 +``` """ +function eweights(t::AbstractVector{T}, λ::Real) where T<:Integer + 0 < λ <= 1 || throw(ArgumentError("Smoothing factor must be between 0 and 1")) + + w0 = map(t) do i + i > 0 || throw(ArgumentError("Time indices must be non-zero positive integers")) + λ * (1 - λ)^(1 - i) + end + + s = sum(w0) + Weights{typeof(s), eltype(w0), typeof(w0)}(w0, s) +end + function eweights(n::Integer, λ::Real) n > 0 || throw(ArgumentError("cannot construct exponential weights of length < 1")) - 0 < λ <= 1 || throw(ArgumentError("smoothing factor must be between 0 and 1")) - w0 = map(i -> λ * (1 - λ)^(1 - i), 1:n) - s = sum(w0) - ExponentialWeights{typeof(s), eltype(w0), typeof(w0)}(w0, s) + eweights(1:n, λ) end +eweights(t::AbstractVector, r::AbstractRange, λ::Real) = + eweights(something.(indexin(t, r)), λ) + # NOTE: No variance correction is implemented for exponential weights ##### Equality tests ##### -for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, ExponentialWeights, Weights) +for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights) @eval begin Base.isequal(x::$w, y::$w) = isequal(x.sum, y.sum) && isequal(x.values, y.values) Base.:(==)(x::$w, y::$w) = (x.sum == y.sum) && (x.values == y.values) @@ -531,7 +561,7 @@ _mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T,W} = Compute the weighted quantiles of a vector `v` at a specified set of probability values `p`, using weights given by a weight vector `w` (of type `AbstractWeights`). Weights must not be negative. The weights and data vectors must have the same length. -`NaN` is returned if `x` contains any `NaN` values. An error is raised if `w` contains +`NaN` is returned if `x` contains any `NaN` values. An error is raised if `w` contains any `NaN` values. With [`FrequencyWeights`](@ref), the function returns the same result as @@ -552,15 +582,15 @@ function quantile(v::RealVector{V}, w::AbstractWeights{W}, p::RealVector) where all(x -> 0 <= x <= 1, p) || throw(ArgumentError("input probability out of [0,1] range")) w.sum == 0 && throw(ArgumentError("weight vector cannot sum to zero")) - length(v) == length(w) || throw(ArgumentError("data and weight vectors must be the same size," * + length(v) == length(w) || throw(ArgumentError("data and weight vectors must be the same size," * "got $(length(v)) and $(length(w))")) for x in w.values isnan(x) && throw(ArgumentError("weight vector cannot contain NaN entries")) x < 0 && throw(ArgumentError("weight vector cannot contain negative entries")) end - isa(w, FrequencyWeights) && !(eltype(w) <: Integer) && any(!isinteger, w) && - throw(ArgumentError("The values of the vector of `FrequencyWeights` must be numerically" * + isa(w, FrequencyWeights) && !(eltype(w) <: Integer) && any(!isinteger, w) && + throw(ArgumentError("The values of the vector of `FrequencyWeights` must be numerically" * "equal to integers. Use `ProbabilityWeights` or `AnalyticWeights` instead.")) # remove zeros weights and sort diff --git a/test/runtests.jl b/test/runtests.jl index dac21a0c8..500539c74 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using StatsBase +using Dates using LinearAlgebra using Random using Statistics diff --git a/test/weights.jl b/test/weights.jl index ecd07f90d..fab085d0e 100644 --- a/test/weights.jl +++ b/test/weights.jl @@ -449,21 +449,51 @@ end @test round(mean(Union{Int,Missing}[1,2], weights([1,2])), digits=3) ≈ 1.667 end -@testset "ExponentialWeights" begin - @testset "Basic Usage" begin +@testset "Exponential Weights" begin + @testset "Usage" begin θ = 5.25 λ = 1 - exp(-1 / θ) # simple conversion for the more common/readable method - v = [λ*(1-λ)^(1-i) for i = 1:4] - w = ExponentialWeights(v) + w = Weights(v) @test round.(w, digits=4) == [0.1734, 0.2098, 0.2539, 0.3071] - @test eweights(4, λ) ≈ w + + @testset "basic" begin + @test eweights(1:4, λ) ≈ w + end + + @testset "1:n" begin + @test eweights(4, λ) ≈ w + end + + @testset "indexin" begin + v = [λ*(1-λ)^(1-i) for i = 1:10] + + # Test that we should be able to skip indices easily + @test eweights([1, 3, 5, 7], 1:10, λ) ≈ Weights(v[[1, 3, 5, 7]]) + + # This should also work with actual time types + t1 = DateTime(2019, 1, 1, 1) + tx = t1 + Hour(7) + tn = DateTime(2019, 1, 2, 1) + + @test eweights(t1:Hour(2):tx, t1:Hour(1):tn, λ) ≈ Weights(v[[1, 3, 5, 7]]) + end end @testset "Failure Conditions" begin + # n == 0 @test_throws ArgumentError eweights(0, 0.3) + + # λ > 1.0 @test_throws ArgumentError eweights(1, 1.1) + + # time indices are not all positive non-zero integers + @test_throws ArgumentError eweights([0, 1, 2, 3], 0.3) + + # Passing in an array of bools will work because Bool <: Integer, + # but any `false` values will trigger the same argument error as 0.0 + @test_throws ArgumentError eweights([true, false, true, true], 0.3) end end From bc9cdd0cce5f28ba71f335df6667cd6d969e9d26 Mon Sep 17 00:00:00 2001 From: rofinn Date: Wed, 29 May 2019 11:57:57 -0500 Subject: [PATCH 03/13] Move some of the documentation to the markdown files. --- docs/src/weights.md | 50 ++++++++++++++++++++++++++++++++++++++++++--- src/weights.jl | 26 ----------------------- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/docs/src/weights.md b/docs/src/weights.md index 94f02ebad..214610d54 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -55,8 +55,53 @@ w = weights([1., 2., 3.]) Exponential weights are a common form of temporal weights which assign exponentially decreasing weight to past observations. -```julia -w = eweights(4, 0.173) # construction based on length and rate parameter +For each element `i` in `t` the weight value is computed as: + +``λ (1 - λ)^{1 - i}`` + +``λ`` is a smoothing factor or rate parameter such that ``0 < λ \\leq 1``. +As this value approaches 0, the resulting weights will be almost equal, +while values closer to 1 will put greater weight on the tail elements of the vector. + +# Examples +```julia-repl +julia> eweights(1:10, 0.3) +10-element Weights{Float64,Float64,Array{Float64,1}}: + 0.3 + 0.42857142857142855 + 0.6122448979591837 + 0.8746355685131197 + 1.249479383590171 + 1.7849705479859588 + 2.549957925694227 + 3.642797036706039 + 5.203995766722913 + 7.434279666747019 +``` + +Simply passing the number of observations `n` is equivalent to passing in `1:n`. +```julia-repl +julia> eweights(10, 0.3) +10-element Weights{Float64,Float64,Array{Float64,1}}: + 0.3 + 0.42857142857142855 + 0.6122448979591837 + 0.8746355685131197 + 1.249479383590171 + 1.7849705479859588 + 2.549957925694227 + 3.642797036706039 + 5.203995766722913 + 7.434279666747019 +``` + +Finally, passing arbitrary times and a step range is equivalent to passing `something.(indexin(t, r))`. +```julia-repl +julia> eweights([1, 3, 5], 1:10, 0.3) +3-element Weights{Float64,Float64,Array{Float64,1}}: + 0.3 + 0.6122448979591837 + 1.249479383590171 ``` ## Methods @@ -75,7 +120,6 @@ The following constructors are provided: AnalyticWeights FrequencyWeights ProbabilityWeights -ExponentialWeights Weights aweights fweights diff --git a/src/weights.jl b/src/weights.jl index 06b803651..cea638178 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -224,32 +224,6 @@ julia> eweights(1:10, 0.3) 5.203995766722913 7.434279666747019 ``` - -Simply passing the number of observations `n` is equivalent to passing in `1:n`. -```julia-repl -julia> eweights(10, 0.3) -10-element Weights{Float64,Float64,Array{Float64,1}}: - 0.3 - 0.42857142857142855 - 0.6122448979591837 - 0.8746355685131197 - 1.249479383590171 - 1.7849705479859588 - 2.549957925694227 - 3.642797036706039 - 5.203995766722913 - 7.434279666747019 -``` - -Finally, passing arbitrary times and a step range is equivalent to passing -`something.(indexin(t, r))`. -```julia-repl -julia> eweights([1, 3, 5], 1:10, 0.3) -3-element Weights{Float64,Float64,Array{Float64,1}}: - 0.3 - 0.6122448979591837 - 1.249479383590171 -``` """ function eweights(t::AbstractVector{T}, λ::Real) where T<:Integer 0 < λ <= 1 || throw(ArgumentError("Smoothing factor must be between 0 and 1")) From 22c0f62bfa20864a7058f179b76bd1fd88fd00b1 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Mon, 3 Jun 2019 10:44:39 -0500 Subject: [PATCH 04/13] Update docs/src/weights.md Co-Authored-By: Milan Bouchet-Valat --- docs/src/weights.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/weights.md b/docs/src/weights.md index 214610d54..40f641610 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -50,7 +50,7 @@ w = Weights([1., 2., 3.]) w = weights([1., 2., 3.]) ``` -### `eweights` +### Exponential weights: `eweights` Exponential weights are a common form of temporal weights which assign exponentially decreasing weight to past observations. From 6fdf82d347796141fc1af0feecc48aaa85041034 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Mon, 3 Jun 2019 10:46:24 -0500 Subject: [PATCH 05/13] Update docs/src/weights.md Co-Authored-By: Milan Bouchet-Valat --- docs/src/weights.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/weights.md b/docs/src/weights.md index 40f641610..8dd85cf17 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -59,7 +59,7 @@ For each element `i` in `t` the weight value is computed as: ``λ (1 - λ)^{1 - i}`` -``λ`` is a smoothing factor or rate parameter such that ``0 < λ \\leq 1``. +``λ`` is a smoothing factor or rate parameter such that ``0 < λ ≤ 1``. As this value approaches 0, the resulting weights will be almost equal, while values closer to 1 will put greater weight on the tail elements of the vector. From 71296bdc13fa454aa2e771a1cc39a35392c3bde8 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Mon, 3 Jun 2019 10:52:13 -0500 Subject: [PATCH 06/13] Update docs/src/weights.md Co-Authored-By: Milan Bouchet-Valat --- docs/src/weights.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/weights.md b/docs/src/weights.md index 8dd85cf17..e375148df 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -53,7 +53,7 @@ w = weights([1., 2., 3.]) ### Exponential weights: `eweights` Exponential weights are a common form of temporal weights which assign exponentially decreasing -weight to past observations. +weights to past observations. For each element `i` in `t` the weight value is computed as: From e9e2796b1d79c96f01c732031662794dc33a9a63 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Mon, 3 Jun 2019 10:52:25 -0500 Subject: [PATCH 07/13] Update src/weights.jl Co-Authored-By: Milan Bouchet-Valat --- src/weights.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/weights.jl b/src/weights.jl index cea638178..59e9d84ae 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -205,7 +205,7 @@ For each element `i` in `t` the weight value is computed as: ``λ (1 - λ)^{1 - i}`` -``λ`` is a smoothing factor or rate parameter such that ``0 < λ \\leq 1``. +``λ`` is a smoothing factor or rate parameter such that ``0 < λ ≤ 1``. As this value approaches 0, the resulting weights will be almost equal, while values closer to 1 will put greater weight on the tail elements of the vector. From 8bb948f5d84bea42120a05c683ebd0763cd47dde Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Mon, 3 Jun 2019 11:41:56 -0500 Subject: [PATCH 08/13] Update src/weights.jl Co-Authored-By: Milan Bouchet-Valat --- src/weights.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/weights.jl b/src/weights.jl index 59e9d84ae..c3f0f9504 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -234,7 +234,7 @@ function eweights(t::AbstractVector{T}, λ::Real) where T<:Integer end s = sum(w0) - Weights{typeof(s), eltype(w0), typeof(w0)}(w0, s) + Weights(w0, s) end function eweights(n::Integer, λ::Real) From 96275d8f94270f43f67307eaccab20bdc498c261 Mon Sep 17 00:00:00 2001 From: rofinn Date: Mon, 3 Jun 2019 11:57:36 -0500 Subject: [PATCH 09/13] Minor cleanup --- src/weights.jl | 6 +----- test/weights.jl | 11 ++++++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/weights.jl b/src/weights.jl index c3f0f9504..37a949437 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -237,11 +237,7 @@ function eweights(t::AbstractVector{T}, λ::Real) where T<:Integer Weights(w0, s) end -function eweights(n::Integer, λ::Real) - n > 0 || throw(ArgumentError("cannot construct exponential weights of length < 1")) - eweights(1:n, λ) -end - +eweights(n::Integer, λ::Real) = eweights(1:n, λ) eweights(t::AbstractVector, r::AbstractRange, λ::Real) = eweights(something.(indexin(t, r)), λ) diff --git a/test/weights.jl b/test/weights.jl index fab085d0e..ab40cf667 100644 --- a/test/weights.jl +++ b/test/weights.jl @@ -2,8 +2,6 @@ using StatsBase using LinearAlgebra, Random, SparseArrays, Test @testset "StatsBase.Weights" begin -# NOTE: Do not add eweights here, as its methods don't match those of the others, so the -# tests below don't make sense for it weight_funcs = (weights, aweights, fweights, pweights) # Construction @@ -481,10 +479,13 @@ end end end - @testset "Failure Conditions" begin - # n == 0 - @test_throws ArgumentError eweights(0, 0.3) + @testset "Empty" begin + @test eweights(0, 0.3) == Weights(Float64[]) + @test eweights(1:0, 0.3) == Weights(Float64[]) + @test eweights(Int[], 1:10, 0.4) == Weights(Float64[]) + end + @testset "Failure Conditions" begin # λ > 1.0 @test_throws ArgumentError eweights(1, 1.1) From cb0962f897d311e6578cffb8e157832c7ebe2c1c Mon Sep 17 00:00:00 2001 From: rofinn Date: Thu, 20 Jun 2019 16:16:41 -0500 Subject: [PATCH 10/13] More docs cleanup. --- docs/src/weights.md | 18 +++++++++++++++--- src/weights.jl | 11 ++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/docs/src/weights.md b/docs/src/weights.md index e375148df..fe2770105 100644 --- a/docs/src/weights.md +++ b/docs/src/weights.md @@ -55,7 +55,7 @@ w = weights([1., 2., 3.]) Exponential weights are a common form of temporal weights which assign exponentially decreasing weights to past observations. -For each element `i` in `t` the weight value is computed as: +If `t` is a vector of temporal indices then for each index `i` we compute the weight as: ``λ (1 - λ)^{1 - i}`` @@ -64,6 +64,7 @@ As this value approaches 0, the resulting weights will be almost equal, while values closer to 1 will put greater weight on the tail elements of the vector. # Examples + ```julia-repl julia> eweights(1:10, 0.3) 10-element Weights{Float64,Float64,Array{Float64,1}}: @@ -80,6 +81,7 @@ julia> eweights(1:10, 0.3) ``` Simply passing the number of observations `n` is equivalent to passing in `1:n`. + ```julia-repl julia> eweights(10, 0.3) 10-element Weights{Float64,Float64,Array{Float64,1}}: @@ -95,15 +97,25 @@ julia> eweights(10, 0.3) 7.434279666747019 ``` -Finally, passing arbitrary times and a step range is equivalent to passing `something.(indexin(t, r))`. +Finally, you can construct exponential weights from an arbitrary subset of timestamps within a larger range. + ```julia-repl -julia> eweights([1, 3, 5], 1:10, 0.3) +julia> t +2019-01-01T01:00:00:2 hours:2019-01-01T05:00:00 + +julia> r +2019-01-01T01:00:00:1 hour:2019-01-02T01:00:00 + +julia> eweights(t, r, 0.3) 3-element Weights{Float64,Float64,Array{Float64,1}}: 0.3 0.6122448979591837 1.249479383590171 ``` +NOTE: This is equivalent to `eweights(something.(indexin(t, r)), 0.3)`, which is saying that for each value in `t` return the corresponding index for that value in `r`. +Since `indexin` returns `nothing` if there is no corresponding value from `t` in `r` we use `something` to eliminate that possibility. + ## Methods `AbstractWeights` implements the following methods: diff --git a/src/weights.jl b/src/weights.jl index 37a949437..63de259b6 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -205,9 +205,14 @@ For each element `i` in `t` the weight value is computed as: ``λ (1 - λ)^{1 - i}`` -``λ`` is a smoothing factor or rate parameter such that ``0 < λ ≤ 1``. -As this value approaches 0, the resulting weights will be almost equal, -while values closer to 1 will put greater weight on the tail elements of the vector. +# Arguments + +- `t::AbstractVector`: temporal indices or timestamps +- `r::StepRange`: a larger range to use when constructing weights from a subset of timestamps +- `n::Integer`: number of temporal indices to assume for the exponential weighting +- `λ::Real`: a smoothing factor or rate parameter such that ``0 < λ ≤ 1``. + As this value approaches 0, the resulting weights will be almost equal, + while values closer to 1 will put greater weight on the tail elements of the vector. # Examples ```julia-repl From b132deb38eaf47772d63535bf31ed999e254344c Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Tue, 25 Jun 2019 10:06:11 -0500 Subject: [PATCH 11/13] Update src/weights.jl Co-Authored-By: Milan Bouchet-Valat --- src/weights.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/weights.jl b/src/weights.jl index 63de259b6..873e6d691 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -198,7 +198,7 @@ end eweights(t::AbstractVector{T}, r::StepRange{T}, λ::Real) where T eweights(n::Integer, λ::Real) -Construct [`Weights`](@ref) vector which assigns exponentially decreasing weights to past +Construct a [`Weights`](@ref) vector which assigns exponentially decreasing weights to past observations, which in this case corresponds to larger integer values `i` in `t`. For each element `i` in `t` the weight value is computed as: From bbc3850a9e581e0cd4940e18a3c8262501e7f846 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Tue, 25 Jun 2019 10:06:29 -0500 Subject: [PATCH 12/13] Update src/weights.jl Co-Authored-By: Milan Bouchet-Valat --- src/weights.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/weights.jl b/src/weights.jl index 873e6d691..bdad988b2 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -200,6 +200,8 @@ end Construct a [`Weights`](@ref) vector which assigns exponentially decreasing weights to past observations, which in this case corresponds to larger integer values `i` in `t`. +If an integer `n` is provided, weights are generated for values from 1 to `n` +(equivalent to `t = 1:n`). For each element `i` in `t` the weight value is computed as: From 02e0c5e410a8c6d9ca4d10944bada5b66965d9f4 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Tue, 25 Jun 2019 10:06:40 -0500 Subject: [PATCH 13/13] Update src/weights.jl Co-Authored-By: Milan Bouchet-Valat --- src/weights.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/weights.jl b/src/weights.jl index bdad988b2..d2548c17f 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -211,7 +211,7 @@ For each element `i` in `t` the weight value is computed as: - `t::AbstractVector`: temporal indices or timestamps - `r::StepRange`: a larger range to use when constructing weights from a subset of timestamps -- `n::Integer`: number of temporal indices to assume for the exponential weighting +- `n::Integer`: if provided instead of `t`, temporal indices are taken to be `1:n` - `λ::Real`: a smoothing factor or rate parameter such that ``0 < λ ≤ 1``. As this value approaches 0, the resulting weights will be almost equal, while values closer to 1 will put greater weight on the tail elements of the vector.