Skip to content

Commit

Permalink
Merge pull request #13348 from stevengj/zvar
Browse files Browse the repository at this point in the history
fix #13309: return correct real value from var, varm, etc of complex arrays
  • Loading branch information
stevengj committed Sep 29, 2015
2 parents 17328be + b26d799 commit 6fa9326
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 25 deletions.
2 changes: 1 addition & 1 deletion base/docs/helpdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6354,7 +6354,7 @@ minimum(A,dims)
doc"""
var(v[, region])
Compute the sample variance of a vector or array `v`, optionally along dimensions in `region`. The algorithm will return an estimator of the generative distribution's variance under the assumption that each entry of `v` is an IID drawn from that generative distribution. This computation is equivalent to calculating `sum((v - mean(v)).^2) / (length(v) - 1)`. Note: Julia does not ignore `NaN` values in the computation. For applications requiring the handling of missing data, the `DataArray` package is recommended.
Compute the sample variance of a vector or array `v`, optionally along dimensions in `region`. The algorithm will return an estimator of the generative distribution's variance under the assumption that each entry of `v` is an IID drawn from that generative distribution. This computation is equivalent to calculating `sumabs2(v - mean(v)) / (length(v) - 1)`. Note: Julia does not ignore `NaN` values in the computation. For applications requiring the handling of missing data, the `DataArray` package is recommended.
"""
var

Expand Down
46 changes: 23 additions & 23 deletions base/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ mean{T}(A::AbstractArray{T}, region) =

##### variances #####

# faster computation of real(conj(x)*y)
realXcY(x::Real, y::Real) = x*y
realXcY(x::Complex, y::Complex) = real(x)*real(y) + imag(x)*imag(y)

function var(iterable; corrected::Bool=true, mean=nothing)
state = start(iterable)
if done(iterable, state)
Expand All @@ -45,12 +49,12 @@ function var(iterable; corrected::Bool=true, mean=nothing)
# Use Welford algorithm as seen in (among other places)
# Knuth's TAOCP, Vol 2, page 232, 3rd edition.
M = value / 1
S = zero(M)
S = real(zero(M))
while !done(iterable, state)
value, state = next(iterable, state)
count += 1
new_M = M + (value - M) / count
S = S + (value - M) * (value - new_M)
S = S + realXcY(value - M, value - new_M)
M = new_M
end
return S / (count - Int(corrected))
Expand All @@ -60,11 +64,11 @@ function var(iterable; corrected::Bool=true, mean=nothing)
# by Chan, Golub, and LeVeque, Technical Report STAN-CS-79-773,
# Department of Computer Science, Stanford University,
# because user can provide mean value that is different to mean(iterable)
sum2 = (value - mean::Number)^2
sum2 = abs2(value - mean::Number)
while !done(iterable, state)
value, state = next(iterable, state)
count += 1
sum2 += (value - mean)^2
sum2 += abs2(value - mean)
end
return sum2 / (count - Int(corrected))
else
Expand All @@ -74,7 +78,7 @@ end

function varzm{T}(A::AbstractArray{T}; corrected::Bool=true)
n = length(A)
n == 0 && return convert(momenttype(T), NaN)
n == 0 && return convert(real(momenttype(T)), NaN)
return sumabs2(A) / (n - Int(corrected))
end

Expand All @@ -89,7 +93,7 @@ function varzm!{S}(R::AbstractArray{S}, A::AbstractArray; corrected::Bool=true)
end

varzm{T}(A::AbstractArray{T}, region; corrected::Bool=true) =
varzm!(reducedim_initarray(A, region, 0, momenttype(T)), A; corrected=corrected)
varzm!(reducedim_initarray(A, region, 0, real(momenttype(T))), A; corrected=corrected)

immutable CentralizedAbs2Fun{T<:Number} <: Func{1}
m::T
Expand Down Expand Up @@ -139,8 +143,8 @@ end

function varm{T}(A::AbstractArray{T}, m::Number; corrected::Bool=true)
n = length(A)
n == 0 && return convert(momenttype(T), NaN)
n == 1 && return convert(momenttype(T), abs2(A[1] - m)/(1 - Int(corrected)))
n == 0 && return convert(real(momenttype(T)), NaN)
n == 1 && return convert(real(momenttype(T)), abs2(A[1] - m)/(1 - Int(corrected)))
return centralize_sumabs2(A, m) / (n - Int(corrected))
end

Expand All @@ -155,14 +159,15 @@ function varm!{S}(R::AbstractArray{S}, A::AbstractArray, m::AbstractArray; corre
end

varm{T}(A::AbstractArray{T}, m::AbstractArray, region; corrected::Bool=true) =
varm!(reducedim_initarray(A, region, 0, momenttype(T)), A, m; corrected=corrected)
varm!(reducedim_initarray(A, region, 0, real(momenttype(T))), A, m; corrected=corrected)


function var{T}(A::AbstractArray{T}; corrected::Bool=true, mean=nothing)
convert(momenttype(T), mean == 0 ? varzm(A; corrected=corrected) :
mean === nothing ? varm(A, Base.mean(A); corrected=corrected) :
isa(mean, Number) ? varm(A, mean::Number; corrected=corrected) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))")))::momenttype(T)
convert(real(momenttype(T)),
mean == 0 ? varzm(A; corrected=corrected) :
mean === nothing ? varm(A, Base.mean(A); corrected=corrected) :
isa(mean, Number) ? varm(A, mean::Number; corrected=corrected) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))")))::real(momenttype(T))
end

function var(A::AbstractArray, region; corrected::Bool=true, mean=nothing)
Expand Down Expand Up @@ -243,7 +248,7 @@ _vmean(x::AbstractMatrix, vardim::Int) = mean(x, vardim)

# core functions

unscaled_covzm(x::AbstractVector) = dot(x, x)
unscaled_covzm(x::AbstractVector) = sumabs2(x)
unscaled_covzm(x::AbstractMatrix, vardim::Int) = (vardim == 1 ? _conj(x'x) : x * x')

unscaled_covzm(x::AbstractVector, y::AbstractVector) = dot(x, y)
Expand All @@ -256,7 +261,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =

# covzm (with centered data)

covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x, x) / (length(x) - Int(corrected))
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))

covzm(x::AbstractMatrix; vardim::Int=1, corrected::Bool=true) =
scale!(unscaled_covzm(x, vardim), inv(size(x,vardim) - Int(corrected)))
Expand Down Expand Up @@ -372,7 +377,7 @@ end

# corzm (non-exported, with centered data)

corzm{T}(x::AbstractVector{T}) = float(one(T) * one(T))
corzm{T}(x::AbstractVector{T}) = one(real(T))

corzm(x::AbstractMatrix; vardim::Int=1) =
(c = unscaled_covzm(x, vardim); cov2cor!(c, sqrt!(diag(c))))
Expand Down Expand Up @@ -408,7 +413,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix; vardim::Int=1) =

# corm

corm(x::AbstractVector, xmean) = corzm(x .- xmean)
corm{T}(x::AbstractVector{T}, xmean) = one(real(T))

corm(x::AbstractMatrix, xmean; vardim::Int=1) = corzm(x .- xmean; vardim=vardim)

Expand All @@ -419,12 +424,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean; vardim::Int=1) =

# cor

function cor(x::AbstractVector; mean=nothing)
mean == 0 ? corzm(x) :
mean === nothing ? corm(x, Base.mean(x)) :
isa(mean, Number) ? corm(x, mean) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
cor{T}(x::AbstractVector{T}; mean=nothing) = one(real(T))

function cor(x::AbstractMatrix; vardim::Int=1, mean=nothing)
mean == 0 ? corzm(x; vardim=vardim) :
Expand Down
2 changes: 1 addition & 1 deletion doc/stdlib/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1587,7 +1587,7 @@ Statistics

.. Docstring generated from Julia source
Compute the sample variance of a vector or array ``v``\ , optionally along dimensions in ``region``\ . The algorithm will return an estimator of the generative distribution's variance under the assumption that each entry of ``v`` is an IID drawn from that generative distribution. This computation is equivalent to calculating ``sum((v - mean(v)).^2) / (length(v) - 1)``\ . Note: Julia does not ignore ``NaN`` values in the computation. For applications requiring the handling of missing data, the ``DataArray`` package is recommended.
Compute the sample variance of a vector or array ``v``\ , optionally along dimensions in ``region``\ . The algorithm will return an estimator of the generative distribution's variance under the assumption that each entry of ``v`` is an IID drawn from that generative distribution. This computation is equivalent to calculating ``sumabs2(v - mean(v)) / (length(v) - 1)``\ . Note: Julia does not ignore ``NaN`` values in the computation. For applications requiring the handling of missing data, the ``DataArray`` package is recommended.

.. function:: varm(v, m)

Expand Down
17 changes: 17 additions & 0 deletions test/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,20 @@ end
@test_throws ArgumentError histrange([1, 10], 0)
@test_throws ArgumentError histrange([1, 10], -1)
@test_throws ArgumentError histrange(Float64[],-1)

# variance of complex arrays (#13309)
let z = rand(Complex128, 10)
@test var(z) invoke(var, (Any,), z) cov(z) var(z,1)[1] sumabs2(z - mean(z))/9
@test isa(var(z), Float64)
@test isa(invoke(var, (Any,), z), Float64)
@test isa(cov(z), Float64)
@test isa(var(z,1), Vector{Float64})
@test varm(z, 0.0) invoke(varm, (Any,Float64), z, 0.0) sumabs2(z)/9
@test isa(varm(z, 0.0), Float64)
@test isa(invoke(varm, (Any,Float64), z, 0.0), Float64)
@test cor(z) === 1.0
end
let v = varm([1.0+2.0im], 0; corrected = false)
@test v 5
@test isa(v, Float64)
end

0 comments on commit 6fa9326

Please sign in to comment.