JuliaStats · pdeffebach · Mar 13, 2020 · Mar 13, 2020 · Mar 13, 2020 · Apr 9, 2020
diff --git a/Project.toml b/Project.toml
@@ -1,5 +1,4 @@
 name = "Statistics"
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

diff --git a/src/Statistics.jl b/src/Statistics.jl
@@ -479,13 +479,18 @@ end
 _vmean(x::AbstractVector, vardim::Int) = mean(x)
 _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
 
+_abs2(x::Number) = abs2(x)
+_abs2(x)       = x*x'
+
+_conjmul(x::Number, y::Number) = x * conj(y)
+_conjmul(x, y) = x * _conj(y)'  
+
 # core functions
 
-unscaled_covzm(x::AbstractVector{<:Number})    = sum(abs2, x)
-unscaled_covzm(x::AbstractVector)              = sum(t -> t*t', x)
+unscaled_covzm(x::AbstractVector)              = sum(_abs2, x)
 unscaled_covzm(x::AbstractMatrix, vardim::Int) = (vardim == 1 ? _conj(x'x) : x * x')
 
-unscaled_covzm(x::AbstractVector, y::AbstractVector) = sum(conj(y[i])*x[i] for i in eachindex(y, x))
+unscaled_covzm(x::AbstractVector, y::AbstractVector) = sum(_conjmul(x[i], y[i]) for i in eachindex(y, x))
 unscaled_covzm(x::AbstractVector, y::AbstractMatrix, vardim::Int) =
     (vardim == 1 ? *(transpose(x), _conj(y)) : *(transpose(x), transpose(_conj(y))))
 unscaled_covzm(x::AbstractMatrix, y::AbstractVector, vardim::Int) =
@@ -494,7 +499,25 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
     (vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
 
 # covzm (with centered data)
-
+function covzm(itr::Any; corrected::Bool=true)
+    y = iterate(itr)
+    if y === nothing
+        v = _abs2(zero(eltype(itr)))
+        return (v + v) / 0
+    end
+    count = 1
+    value, state = y
+    f_value = _abs2(value)
+    total = Base.reduce_first(+, f_value)
+    y = iterate(itr, state)
+    while y !== nothing 
+        value, state = y
+        total += _abs2(value)
+        count += 1
+        y = iterate(itr, state)
+    end
+    return total / (count - Int(corrected))
+end
 covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
 function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
     C = unscaled_covzm(x, vardim)
@@ -504,6 +527,26 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
     A .= A .* b
     return A
 end
+function covzm(x::Any, y::Any; corrected::Bool=true)
+    z = zip(x, y)
+    z_itr = iterate(z)
+    if z_itr === nothing
+        v = _conjmul(zero(eltype(x)), zero(eltype(y)))
+        return (v + v) / 0
+    end
+    count = 1
+    (xi, yi), state = z_itr
+    f_value = _conjmul(xi, yi)
+    total = Base.reduce_first(+, f_value)
+    z_itr = iterate(z, state)
+    while z_itr !== nothing 
+        (xi, yi), state = z_itr
+        total += _conjmul(xi, yi)
+        count += 1
+        z_itr = iterate(z, state)
+    end
+    return total / (count - Int(corrected))
+end
 covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
     unscaled_covzm(x, y) / (length(x) - Int(corrected))
 function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -518,20 +561,71 @@ end
 # covm (with provided mean)
 ## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
 ## which can't be handled by broadcast
+function covm(itr::Any, itrmean; corrected::Bool=true)
+    y = iterate(itr)
+    if y === nothing
+        v = _abs2(zero(eltype(itr - itrmean)))
+        return (v + v) / 0
+    end
+    count = 1
+    itri, state = y
+    first_value = _abs2(itri - itrmean)
+    total = Base.reduce_first(+, first_value)
+    y = iterate(itr, state)
+    while y !== nothing 
+        itri, state = y
+        total += _abs2(itri - itrmean)
+        count += 1
+        y = iterate(itr, state)
+    end
+    return total / (count - Int(corrected))
+end
 covm(x::AbstractVector, xmean; corrected::Bool=true) =
     covzm(map(t -> t - xmean, x); corrected=corrected)
 covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
     covzm(x .- xmean, vardim; corrected=corrected)
+function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
+    z = zip(x, y)
+    z_itr = iterate(z)
+    if z_itr === nothing
+        v = _conjmul(zero(eltype(x)), zero(eltype(y)))
+        return (v + v) / 0
+    end
+    count = 1
+    (xi, yi), state = z_itr
+    first_value = _conjmul(xi-xmean, yi-ymean)
+    total = Base.reduce_first(+, first_value)
+    z_itr = iterate(z, state)
+    while z_itr !== nothing 
+        (xi, yi), state = z_itr
+        total += _conjmul(xi-xmean, yi-ymean)
+        count += 1
+        z_itr = iterate(z, state)
+    end
+    return total / (count - Int(corrected))
+end
 covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
     covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
 covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
     covzm(x .- xmean, y .- ymean, vardim; corrected=corrected)
 
 # cov (API)
+"""
+    cov(x::Any; corrected::Bool=true)
+
+Compute the variance of the iterator `x`. If `corrected` is `true` (the default) then the sum
+is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n`
+is the number of elements in the iterator, which is not necessarily known. 
+"""
+function cov(x::Any, corrected::Bool=true)
+    covm(x, mean(x); corrected=corrected)
+end
+
 """
     cov(x::AbstractVector; corrected::Bool=true)
 
-Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum
+Compute the variance of the vector `x`. If `x` is a vector of vectors, returns the estimated
+variance-covariance matrix of elements in `x`. If `corrected` is `true` (the default) then the sum
 is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`.
 """
 cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected)
@@ -546,6 +640,19 @@ if `corrected` is `false` where `n = size(X, dims)`.
 cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
     covm(X, _vmean(X, dims), dims; corrected=corrected)
 
+"""
+    cov(x::Any, y::Any; corrected::Bool=true)
+
+Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the
+default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
+``*`` denotes the complex conjugate and `n` is the number of elements in `x` which must equal 
+the number of elements in `y`. If `x` and `y` are both vectors of vectors, computes the analagous
+estimator for the covariance matrix for `xi` and `yi. If `corrected` is `false`, computes 
+``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
+"""
+cov(x::Any, y::Any; corrected::Bool=true) =
+    covm(x, mean(x), y, mean(y); corrected=corrected)
+
 """
     cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
 

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -314,6 +314,10 @@ Y = [6.0  2.0;
      5.0  8.0;
      3.0  4.0;
      2.0  3.0]
+X_vec = [[X[i,1], X[i,2]] for i in 1:size(X, 1)]
+X_gen = ([X[i,1], X[i,2]] for i in 1:size(X, 1))
+Y_vec = [[Y[i,1], Y[i,2]] for i in 1:size(Y, 1)]
+Y_gen = ([Y[i,1], Y[i,2]] for i in 1:size(Y, 1))
 
 @testset "covariance" begin
     for vd in [1, 2], zm in [true, false], cr in [true, false]
@@ -328,6 +332,8 @@ Y = [6.0  2.0;
             end
             x1 = vec(X[:,1])
             y1 = vec(Y[:,1])
+            x1_gen = (x for x in x1)
+            y1_gen = (y for y in y1)
         else
             k = size(X, 1)
             Cxx = zeros(k, k)
@@ -338,6 +344,8 @@ Y = [6.0  2.0;
             end
             x1 = vec(X[1,:])
             y1 = vec(Y[1,:])
+            x1_gen = (x for x in x1)
+            y1_gen = (y for y in y1)
         end
 
         c = zm ? Statistics.covm(x1, 0, corrected=cr) :
@@ -346,14 +354,14 @@ Y = [6.0  2.0;
         @test c ≈ Cxx[1,1]
         @inferred cov(x1, corrected=cr)
 
-        @test cov(X) == Statistics.covm(X, mean(X, dims=1))
+        @test cov(X) == cov(X_vec) == cov(X_gen) == Statistics.covm(X, mean(X, dims=1))
         C = zm ? Statistics.covm(X, 0, vd, corrected=cr) :
                  cov(X, dims=vd, corrected=cr)
         @test size(C) == (k, k)
         @test C ≈ Cxx
         @inferred cov(X, dims=vd, corrected=cr)
 
-        @test cov(x1, y1) == Statistics.covm(x1, mean(x1), y1, mean(y1))
+        @test cov(x1, y1) == cov(x1_gen, y1_gen) == Statistics.covm(x1, mean(x1), y1, mean(y1))
         c = zm ? Statistics.covm(x1, 0, y1, 0, corrected=cr) :
                  cov(x1, y1, corrected=cr)
         @test isa(c, Float64)
@@ -378,7 +386,10 @@ Y = [6.0  2.0;
         @test vec(C) ≈ Cxy[:,1]
         @inferred cov(X, y1, dims=vd, corrected=cr)
 
-        @test cov(X, Y) == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1))
+        # Separate tests for equality and approximation
+        C = cov(X, Y)
+        @test C == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1))
+        @test C ≈ cov(X_vec, Y_vec) ≈ cov(X_gen, Y_gen) 
         C = zm ? Statistics.covm(X, 0, Y, 0, vd, corrected=cr) :
                  cov(X, Y, dims=vd, corrected=cr)
         @test size(C) == (k, k)
@@ -644,12 +655,15 @@ end
 @testset "cov and cor of complex arrays (issue #21093)" begin
     x = [2.7 - 3.3im, 0.9 + 5.4im, 0.1 + 0.2im, -1.7 - 5.8im, 1.1 + 1.9im]
     y = [-1.7 - 1.6im, -0.2 + 6.5im, 0.8 - 10.0im, 9.1 - 3.4im, 2.7 - 5.5im]
-    @test cov(x, y) ≈ 4.8365 - 12.119im
-    @test cov(y, x) ≈ 4.8365 + 12.119im
+    x_gen = (i for i in x)
+    y_gen = (i for i in y)
+    xy_vec = [[x[i], y[i]] for i in 1:length(x)]
+    @test cov(x, y) ≈ cov(x_gen, y_gen) ≈4.8365 - 12.119im
+    @test cov(y, x) ≈ cov(y_gen, x_gen) ≈ 4.8365 + 12.119im
     @test cov(x, reshape(y, :, 1)) ≈ reshape([4.8365 - 12.119im], 1, 1)
     @test cov(reshape(x, :, 1), y) ≈ reshape([4.8365 - 12.119im], 1, 1)
     @test cov(reshape(x, :, 1), reshape(y, :, 1)) ≈ reshape([4.8365 - 12.119im], 1, 1)
-    @test cov([x y]) ≈ [21.779 4.8365-12.119im;
+    @test cov([x y]) ≈ cov(xy_vec) ≈ cov((i for i in xy_vec)) ≈ [21.779 4.8365-12.119im;
                         4.8365+12.119im 54.548]
     @test cor(x, y) ≈ 0.14032104449218274 - 0.35160772008699703im
     @test cor(y, x) ≈ 0.14032104449218274 + 0.35160772008699703im