From 38efcf5dffa2e9380f26a90ce8c9ff121146d13a Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 21 Nov 2021 16:35:19 +0100 Subject: [PATCH 1/2] Deprecate `varm` and `stdm` `varm` and `stdm` are redundant with `var(..., mean=m)` and `std(..., mean=m)`. Moreover, their status is not clear: they are technically exported by Statistics (but not by StatsBase itself), but their docstrings are not included in the manual. They were only tested indirectly, so add tests to ensure deprecations work. The status of `varm!` is even weirder as it's imported from Statistics, but Statistics does not export it, and it has no docstrings. Keep a deprecation just in case. --- src/cov.jl | 2 +- src/deprecates.jl | 7 ++++ src/moments.jl | 99 +++++++++-------------------------------------- test/moments.jl | 12 ++++++ 4 files changed, 39 insertions(+), 81 deletions(-) diff --git a/src/cov.jl b/src/cov.jl index b4b472f83..1c18da2bf 100644 --- a/src/cov.jl +++ b/src/cov.jl @@ -100,7 +100,7 @@ cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; corrected::DepBool=nothing) function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1) c = covm(x, mean, w, vardim; corrected=false) - s = stdm(x, w, mean, vardim; corrected=false) + s = std(x, w, vardim; mean=mean, corrected=false) cov2cor!(c, s) end diff --git a/src/deprecates.jl b/src/deprecates.jl index 9e409b33a..9d0cc0e6e 100644 --- a/src/deprecates.jl +++ b/src/deprecates.jl @@ -39,3 +39,10 @@ end ### Deprecated September 2019 @deprecate sum(A::AbstractArray, w::AbstractWeights, dims::Int) sum(A, w, dims=dims) @deprecate values(wv::AbstractWeights) convert(Vector, wv) + +### Deprecated November 2021 +@deprecate stdm(x::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) std(x, w, mean=m, corrected=corrected) false +@deprecate varm(x::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) var(x, w, mean=m, corrected=corrected) false +@deprecate stdm(x::RealArray, w::AbstractWeights, m::RealArray, dim::Int; corrected::DepBool=nothing) std(x, w, dim, mean=m, corrected=corrected) false +@deprecate varm(x::RealArray, w::AbstractWeights, m::RealArray, dim::Int; corrected::DepBool=nothing) var(x, w, dim, mean=m, corrected=corrected) false +@deprecate varm!(R::AbstractArray, x::RealArray, w::AbstractWeights, m::RealArray, dim::Int; corrected::DepBool=nothing) var!(R, x, w, dim, mean=m, corrected=corrected) false \ No newline at end of file diff --git a/src/moments.jl b/src/moments.jl index a527a0501..5b0a66147 100644 --- a/src/moments.jl +++ b/src/moments.jl @@ -1,26 +1,6 @@ ##### Weighted var & std ## var -""" - varm(x::AbstractArray, w::AbstractWeights, m, [dim]; corrected=false) - -Compute the variance of a real-valued array `x` with a known mean `m`, optionally -over a dimension `dim`. Observations in `x` are weighted using weight vector `w`. -The uncorrected (when `corrected=false`) sample variance is defined as: -```math -\\frac{1}{\\sum{w}} \\sum_{i=1}^n {w_i\\left({x_i - m}\\right)^2 } -``` -where ``n`` is the length of the input. The unbiased estimate (when `corrected=true`) of -the population variance is computed by replacing -``\\frac{1}{\\sum{w}}`` with a factor dependent on the type of weights used: -* `AnalyticWeights`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}`` -* `FrequencyWeights`: ``\\frac{1}{\\sum{w} - 1}`` -* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)` -* `Weights`: `ArgumentError` (bias correction not supported) -""" -varm(v::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) = - _moment2(v, w, m; corrected=depcheck(:varm, :corrected, corrected)) - """ var(x::AbstractArray, w::AbstractWeights, [dim]; mean=nothing, corrected=false) @@ -43,30 +23,22 @@ function var(v::RealArray, w::AbstractWeights; mean=nothing, corrected = depcheck(:var, :corrected, corrected) if mean == nothing - varm(v, w, Statistics.mean(v, w); corrected=corrected) + _moment2(v, w, Statistics.mean(v, w); corrected=corrected) else - varm(v, w, mean; corrected=corrected) + _moment2(v, w, mean; corrected=corrected) end end ## var along dim -function varm!(R::AbstractArray, A::RealArray, w::AbstractWeights, M::RealArray, - dim::Int; corrected::DepBool=nothing) - corrected = depcheck(:varm!, :corrected, corrected) - rmul!(_wsum_centralize!(R, abs2, A, convert(Vector, w), M, dim, true), - varcorrection(w, corrected)) -end - function var!(R::AbstractArray, A::RealArray, w::AbstractWeights, dims::Int; mean=nothing, corrected::DepBool=nothing) corrected = depcheck(:var!, :corrected, corrected) if mean == 0 - varm!(R, A, w, Base.reducedim_initarray(A, dims, 0, eltype(R)), dims; - corrected=corrected) - elseif mean == nothing - varm!(R, A, w, Statistics.mean(A, w, dims=dims), dims; corrected=corrected) + mean = Base.reducedim_initarray(A, dims, 0, eltype(R)) + elseif mean === nothing + mean = Statistics.mean(A, w, dims=dims) else # check size of mean for i = 1:ndims(A) @@ -78,15 +50,9 @@ function var!(R::AbstractArray, A::RealArray, w::AbstractWeights, dims::Int; dM == dA || throw(DimensionMismatch("Incorrect size of mean.")) end end - varm!(R, A, w, mean, dims; corrected=corrected) end -end - -function varm(A::RealArray, w::AbstractWeights, M::RealArray, dim::Int; - corrected::DepBool=nothing) - corrected = depcheck(:varm, :corrected, corrected) - varm!(similar(A, Float64, Base.reduced_indices(axes(A), dim)), A, w, M, - dim; corrected=corrected) + return rmul!(_wsum_centralize!(R, abs2, A, convert(Vector, w), mean, dims, true), + varcorrection(w, corrected)) end function var(A::RealArray, w::AbstractWeights, dim::Int; mean=nothing, @@ -97,26 +63,6 @@ function var(A::RealArray, w::AbstractWeights, dim::Int; mean=nothing, end ## std -""" - stdm(x::AbstractArray, w::AbstractWeights, m, [dim]; corrected=false) - -Compute the standard deviation of a real-valued array `x` with a known mean `m`, -optionally over a dimension `dim`. Observations in `x` are weighted using weight vector `w`. -The uncorrected (when `corrected=false`) sample standard deviation is defined as: -```math -\\sqrt{\\frac{1}{\\sum{w}} \\sum_{i=1}^n {w_i\\left({x_i - m}\\right)^2 }} -``` -where ``n`` is the length of the input. The unbiased estimate (when `corrected=true`) of the -population standard deviation is computed by replacing ``\\frac{1}{\\sum{w}}`` with a factor -dependent on the type of weights used: -* `AnalyticWeights`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}`` -* `FrequencyWeights`: ``\\frac{1}{\\sum{w} - 1}`` -* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)` -* `Weights`: `ArgumentError` (bias correction not supported) -""" -stdm(v::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) = - sqrt(varm(v, w, m, corrected=depcheck(:stdm, :corrected, corrected))) - """ std(x::AbstractArray, w::AbstractWeights, [dim]; mean=nothing, corrected=false) @@ -138,15 +84,8 @@ weights used: std(v::RealArray, w::AbstractWeights; mean=nothing, corrected::DepBool=nothing) = sqrt.(var(v, w; mean=mean, corrected=depcheck(:std, :corrected, corrected))) -stdm(v::RealArray, m::RealArray, dim::Int; corrected::DepBool=nothing) = - sqrt!(varm(v, m, dims=dim, corrected=depcheck(:stdm, :corrected, corrected))) - -stdm(v::RealArray, w::AbstractWeights, m::RealArray, dim::Int; - corrected::DepBool=nothing) = - sqrt.(varm(v, w, m, dim; corrected=depcheck(:stdm, :corrected, corrected))) - -std(v::RealArray, w::AbstractWeights, dim::Int; mean=nothing, - corrected::DepBool=nothing) = +std(v::RealArray, w::AbstractWeights, dim::Int; + mean=nothing, corrected::DepBool=nothing) = sqrt.(var(v, w, dim; mean=mean, corrected=depcheck(:std, :corrected, corrected))) ##### Fused statistics @@ -161,7 +100,7 @@ See [`var`](@ref) documentation for more details. """ function mean_and_var(x; corrected::Bool=true) m = mean(x) - v = varm(x, m; corrected=corrected) + v = var(x, mean=m, corrected=corrected) m, v end @@ -177,30 +116,30 @@ See [`std`](@ref) documentation for more details. """ function mean_and_std(x; corrected::Bool=true) m = mean(x) - s = stdm(x, m; corrected=corrected) + s = std(x, mean=m, corrected=corrected) m, s end function mean_and_var(x::RealArray, w::AbstractWeights; corrected::DepBool=nothing) m = mean(x, w) - v = varm(x, w, m; corrected=depcheck(:mean_and_var, :corrected, corrected)) + v = var(x, w, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) m, v end function mean_and_std(x::RealArray, w::AbstractWeights; corrected::DepBool=nothing) m = mean(x, w) - s = stdm(x, w, m; corrected=depcheck(:mean_and_std, :corrected, corrected)) + s = std(x, w, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) m, s end function mean_and_var(x::RealArray, dim::Int; corrected::Bool=true) - m = mean(x, dims = dim) - v = varm(x, m, dims = dim, corrected=corrected) + m = mean(x, dims=dim) + v = var(x, dims=dim, mean=m, corrected=corrected) m, v end function mean_and_std(x::RealArray, dim::Int; corrected::Bool=true) - m = mean(x, dims = dim) - s = stdm(x, m, dim; corrected=corrected) + m = mean(x, dims=dim) + s = std(x, dims=dim, mean=m, corrected=corrected) m, s end @@ -208,13 +147,13 @@ end function mean_and_var(x::RealArray, w::AbstractWeights, dims::Int; corrected::DepBool=nothing) m = mean(x, w, dims=dims) - v = varm(x, w, m, dims; corrected=depcheck(:mean_and_var, :corrected, corrected)) + v = var(x, w, dims, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) m, v end function mean_and_std(x::RealArray, w::AbstractWeights, dims::Int; corrected::DepBool=nothing) m = mean(x, w, dims=dims) - s = stdm(x, w, m, dims; corrected=depcheck(:mean_and_std, :corrected, corrected)) + s = std(x, w, dims, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) m, s end diff --git a/test/moments.jl b/test/moments.jl index 97fda44ac..09c77e487 100644 --- a/test/moments.jl +++ b/test/moments.jl @@ -20,11 +20,13 @@ w = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] @testset "Variance" begin @test var(x, wv; corrected=false) ≈ expected_var @test var(x, wv; mean=m, corrected=false) ≈ expected_var + @test varm(x, wv, m; corrected=false) ≈ expected_var end @testset "Standard Deviation" begin @test std(x, wv; corrected=false) ≈ expected_std @test std(x, wv; mean=m, corrected=false) ≈ expected_std + @test stdm(x, wv, m; corrected=false) ≈ expected_std end @testset "Mean and Variance" begin @@ -62,6 +64,7 @@ expected_std = sqrt.(expected_var) else @test var(x, wv; corrected=true) ≈ expected_var[i] @test var(x, wv; mean=m, corrected=true) ≈ expected_var[i] + @test varm(x, wv, m; corrected=true) ≈ expected_var[i] end end @@ -71,6 +74,7 @@ expected_std = sqrt.(expected_var) else @test std(x, wv; corrected=true) ≈ expected_std[i] @test std(x, wv; mean=m, corrected=true) ≈ expected_std[i] + @test stdm(x, wv, m; corrected=true) ≈ expected_std[i] end end @@ -123,6 +127,8 @@ w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] @test var(x, wv2, 2; corrected=false) ≈ expected_var2 @test var(x, wv1, 1; mean=m1, corrected=false) ≈ expected_var1 @test var(x, wv2, 2; mean=m2, corrected=false) ≈ expected_var2 + @test varm(x, wv1, m1, 1; corrected=false) ≈ expected_var1 + @test varm(x, wv2, m2, 2; corrected=false) ≈ expected_var2 end @testset "Standard Deviation" begin @@ -130,6 +136,8 @@ w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] @test std(x, wv2, 2; corrected=false) ≈ expected_std2 @test std(x, wv1, 1; mean=m1, corrected=false) ≈ expected_std1 @test std(x, wv2, 2; mean=m2, corrected=false) ≈ expected_std2 + @test stdm(x, wv1, m1, 1; corrected=false) ≈ expected_std1 + @test stdm(x, wv2, m2, 2; corrected=false) ≈ expected_std2 end @testset "Mean and Variance" begin @@ -186,6 +194,8 @@ end @test var(x, wv2, 2; corrected=true) ≈ expected_var2 @test var(x, wv1, 1; mean=m1, corrected=true) ≈ expected_var1 @test var(x, wv2, 2; mean=m2, corrected=true) ≈ expected_var2 + @test varm(x, wv1, m1, 1; corrected=true) ≈ expected_var1 + @test varm(x, wv2, m2, 2; corrected=true) ≈ expected_var2 end end @@ -197,6 +207,8 @@ end @test std(x, wv2, 2; corrected=true) ≈ expected_std2 @test std(x, wv1, 1; mean=m1, corrected=true) ≈ expected_std1 @test std(x, wv2, 2; mean=m2, corrected=true) ≈ expected_std2 + @test stdm(x, wv1, m1, 1; corrected=true) ≈ expected_std1 + @test stdm(x, wv2, m2, 2; corrected=true) ≈ expected_std2 end end From bad23b4ab97458390c4991adf553d3c656e50fab Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Fri, 26 Nov 2021 08:36:55 +0100 Subject: [PATCH 2/2] Adjust alignment in test/moments.jl --- test/moments.jl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/moments.jl b/test/moments.jl index 09c77e487..7378c4a10 100644 --- a/test/moments.jl +++ b/test/moments.jl @@ -20,13 +20,13 @@ w = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] @testset "Variance" begin @test var(x, wv; corrected=false) ≈ expected_var @test var(x, wv; mean=m, corrected=false) ≈ expected_var - @test varm(x, wv, m; corrected=false) ≈ expected_var + @test varm(x, wv, m; corrected=false) ≈ expected_var end @testset "Standard Deviation" begin @test std(x, wv; corrected=false) ≈ expected_std @test std(x, wv; mean=m, corrected=false) ≈ expected_std - @test stdm(x, wv, m; corrected=false) ≈ expected_std + @test stdm(x, wv, m; corrected=false) ≈ expected_std end @testset "Mean and Variance" begin @@ -64,7 +64,7 @@ expected_std = sqrt.(expected_var) else @test var(x, wv; corrected=true) ≈ expected_var[i] @test var(x, wv; mean=m, corrected=true) ≈ expected_var[i] - @test varm(x, wv, m; corrected=true) ≈ expected_var[i] + @test varm(x, wv, m; corrected=true) ≈ expected_var[i] end end @@ -74,7 +74,7 @@ expected_std = sqrt.(expected_var) else @test std(x, wv; corrected=true) ≈ expected_std[i] @test std(x, wv; mean=m, corrected=true) ≈ expected_std[i] - @test stdm(x, wv, m; corrected=true) ≈ expected_std[i] + @test stdm(x, wv, m; corrected=true) ≈ expected_std[i] end end @@ -123,12 +123,12 @@ w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] expected_std2 = sqrt.(expected_var2) @testset "Variance" begin - @test var(x, wv1, 1; corrected=false) ≈ expected_var1 - @test var(x, wv2, 2; corrected=false) ≈ expected_var2 + @test var(x, wv1, 1; corrected=false) ≈ expected_var1 + @test var(x, wv2, 2; corrected=false) ≈ expected_var2 @test var(x, wv1, 1; mean=m1, corrected=false) ≈ expected_var1 @test var(x, wv2, 2; mean=m2, corrected=false) ≈ expected_var2 - @test varm(x, wv1, m1, 1; corrected=false) ≈ expected_var1 - @test varm(x, wv2, m2, 2; corrected=false) ≈ expected_var2 + @test varm(x, wv1, m1, 1; corrected=false) ≈ expected_var1 + @test varm(x, wv2, m2, 2; corrected=false) ≈ expected_var2 end @testset "Standard Deviation" begin @@ -136,8 +136,8 @@ w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] @test std(x, wv2, 2; corrected=false) ≈ expected_std2 @test std(x, wv1, 1; mean=m1, corrected=false) ≈ expected_std1 @test std(x, wv2, 2; mean=m2, corrected=false) ≈ expected_std2 - @test stdm(x, wv1, m1, 1; corrected=false) ≈ expected_std1 - @test stdm(x, wv2, m2, 2; corrected=false) ≈ expected_std2 + @test stdm(x, wv1, m1, 1; corrected=false) ≈ expected_std1 + @test stdm(x, wv2, m2, 2; corrected=false) ≈ expected_std2 end @testset "Mean and Variance" begin @@ -190,12 +190,12 @@ end if isa(wv1, Weights) @test_throws ArgumentError var(x, wv1, 1; corrected=true) else - @test var(x, wv1, 1; corrected=true) ≈ expected_var1 - @test var(x, wv2, 2; corrected=true) ≈ expected_var2 + @test var(x, wv1, 1; corrected=true) ≈ expected_var1 + @test var(x, wv2, 2; corrected=true) ≈ expected_var2 @test var(x, wv1, 1; mean=m1, corrected=true) ≈ expected_var1 @test var(x, wv2, 2; mean=m2, corrected=true) ≈ expected_var2 - @test varm(x, wv1, m1, 1; corrected=true) ≈ expected_var1 - @test varm(x, wv2, m2, 2; corrected=true) ≈ expected_var2 + @test varm(x, wv1, m1, 1; corrected=true) ≈ expected_var1 + @test varm(x, wv2, m2, 2; corrected=true) ≈ expected_var2 end end @@ -207,8 +207,8 @@ end @test std(x, wv2, 2; corrected=true) ≈ expected_std2 @test std(x, wv1, 1; mean=m1, corrected=true) ≈ expected_std1 @test std(x, wv2, 2; mean=m2, corrected=true) ≈ expected_std2 - @test stdm(x, wv1, m1, 1; corrected=true) ≈ expected_std1 - @test stdm(x, wv2, m2, 2; corrected=true) ≈ expected_std2 + @test stdm(x, wv1, m1, 1; corrected=true) ≈ expected_std1 + @test stdm(x, wv2, m2, 2; corrected=true) ≈ expected_std2 end end