diff --git a/src/linalg.jl b/src/linalg.jl index 170232f..cd2041e 100644 --- a/src/linalg.jl +++ b/src/linalg.jl @@ -1010,27 +1010,3 @@ end chol(A::SparseMatrixCSC) = error("Use cholesky() instead of chol() for sparse matrices.") eigen(A::SparseMatrixCSC) = error("Use IterativeEigensolvers.eigs() instead of eigen() for sparse matrices.") - -function Base.cov(X::SparseMatrixCSC; dims::Int=1, corrected::Bool=true) - vardim = dims - a, b = size(X) - n, p = vardim == 1 ? (a, b) : (b, a) - - # The covariance can be decomposed into two terms - # 1/(n - 1) ∑ (x_i - x̄)*(x_i - x̄)' = 1/(n - 1) (∑ x_i*x_i' - n*x̄*x̄') - # which can be evaluated via a sparse matrix-matrix product - - # Compute ∑ x_i*x_i' = X'X using sparse matrix-matrix product - out = Matrix(Base.unscaled_covzm(X, vardim)) - - # Compute x̄ - x̄ᵀ = mean(X, dims=vardim) - - # Subtract n*x̄*x̄' from X'X - @inbounds for j in 1:p, i in 1:p - out[i,j] -= x̄ᵀ[i] * x̄ᵀ[j]' * n - end - - # scale with the sample size n or the corrected sample size n - 1 - return rmul!(out, inv(n - corrected)) -end diff --git a/src/sparsematrix.jl b/src/sparsematrix.jl index 6986b12..47fe84a 100644 --- a/src/sparsematrix.jl +++ b/src/sparsematrix.jl @@ -3489,67 +3489,6 @@ function hash(A::SparseMatrixCSC{T}, h::UInt) where T hashrun(0, length(A)-lastidx, h) # Hash zeros at end end -## Statistics - -# This is the function that does the reduction underlying var/std -function Base.centralize_sumabs2!(R::AbstractArray{S}, A::SparseMatrixCSC{Tv,Ti}, means::AbstractArray) where {S,Tv,Ti} - lsiz = Base.check_reducedims(R,A) - size(means) == size(R) || error("size of means must match size of R") - isempty(R) || fill!(R, zero(S)) - isempty(A) && return R - - colptr = A.colptr - rowval = A.rowval - nzval = A.nzval - m = size(A, 1) - n = size(A, 2) - - if size(R, 1) == size(R, 2) == 1 - # Reduction along both columns and rows - R[1, 1] = Base.centralize_sumabs2(A, means[1]) - elseif size(R, 1) == 1 - # Reduction along rows - @inbounds for col = 1:n - mu = means[col] - r = convert(S, (m-colptr[col+1]+colptr[col])*abs2(mu)) - @simd for j = colptr[col]:colptr[col+1]-1 - r += abs2(nzval[j] - mu) - end - R[1, col] = r - end - elseif size(R, 2) == 1 - # Reduction along columns - rownz = fill(convert(Ti, n), m) - @inbounds for col = 1:n - @simd for j = colptr[col]:colptr[col+1]-1 - row = rowval[j] - R[row, 1] += abs2(nzval[j] - means[row]) - rownz[row] -= 1 - end - end - for i = 1:m - R[i, 1] += rownz[i]*abs2(means[i]) - end - else - # Reduction along a dimension > 2 - @inbounds for col = 1:n - lastrow = 0 - @simd for j = colptr[col]:colptr[col+1]-1 - row = rowval[j] - for i = lastrow+1:row-1 - R[i, col] = abs2(means[i, col]) - end - R[row, col] = abs2(nzval[j] - means[row, col]) - lastrow = row - end - for i = lastrow+1:m - R[i, col] = abs2(means[i, col]) - end - end - end - return R -end - ## Uniform matrix arithmetic (+)(A::SparseMatrixCSC, J::UniformScaling) = A + sparse(J, size(A)...) diff --git a/test/sparse.jl b/test/sparse.jl index fcface7..4828d73 100644 --- a/test/sparse.jl +++ b/test/sparse.jl @@ -489,7 +489,7 @@ end pA = sparse(rand(3, 7)) for arr in (se33, sA, pA) - for f in (sum, prod, minimum, maximum, var) + for f in (sum, prod, minimum, maximum) farr = Array(arr) @test f(arr) ≈ f(farr) @test f(arr, dims=1) ≈ f(farr, dims=1) @@ -518,9 +518,8 @@ end @test prod(sparse(Int[])) === 1 @test_throws ArgumentError minimum(sparse(Int[])) @test_throws ArgumentError maximum(sparse(Int[])) - @test var(sparse(Int[])) === NaN - for f in (sum, prod, var) + for f in (sum, prod) @test isequal(f(spzeros(0, 1), dims=1), f(Matrix{Int}(I, 0, 1), dims=1)) @test isequal(f(spzeros(0, 1), dims=2), f(Matrix{Int}(I, 0, 1), dims=2)) @test isequal(f(spzeros(0, 1), dims=(1, 2)), f(Matrix{Int}(I, 0, 1), dims=(1, 2))) @@ -2033,63 +2032,6 @@ end @test issymmetric(B) end -# Faster covariance function for sparse matrices -# Prevents densifying the input matrix when subtracting the mean -# Test against dense implementation -# PR https://github.com/JuliaLang/julia/pull/22735 -# Part of this test needed to be hacked due to the treatment -# of Inf in sparse matrix algebra -# https://github.com/JuliaLang/julia/issues/22921 -# The issue will be resolved in -# https://github.com/JuliaLang/julia/issues/22733 -@testset "optimizing sparse $elty covariance" for elty in (Float64, Complex{Float64}) - n = 10 - p = 5 - np2 = div(n*p, 2) - nzvals, x_sparse = guardsrand(1) do - if elty <: Real - nzvals = randn(np2) - else - nzvals = complex.(randn(np2), randn(np2)) - end - nzvals, sparse(rand(1:n, np2), rand(1:p, np2), nzvals, n, p) - end - x_dense = convert(Matrix{elty}, x_sparse) - @testset "Test with no Infs and NaNs, vardim=$vardim, corrected=$corrected" for vardim in (1, 2), - corrected in (true, false) - @test cov(x_sparse, dims=vardim, corrected=corrected) ≈ - cov(x_dense , dims=vardim, corrected=corrected) - end - - @testset "Test with $x11, vardim=$vardim, corrected=$corrected" for x11 in (NaN, Inf), - vardim in (1, 2), - corrected in (true, false) - x_sparse[1,1] = x11 - x_dense[1 ,1] = x11 - - cov_sparse = cov(x_sparse, dims=vardim, corrected=corrected) - cov_dense = cov(x_dense , dims=vardim, corrected=corrected) - @test cov_sparse[2:end, 2:end] ≈ cov_dense[2:end, 2:end] - @test isfinite.(cov_sparse) == isfinite.(cov_dense) - @test isfinite.(cov_sparse) == isfinite.(cov_dense) - end - - @testset "Test with NaN and Inf, vardim=$vardim, corrected=$corrected" for vardim in (1, 2), - corrected in (true, false) - x_sparse[1,1] = Inf - x_dense[1 ,1] = Inf - x_sparse[2,1] = NaN - x_dense[2 ,1] = NaN - - cov_sparse = cov(x_sparse, dims=vardim, corrected=corrected) - cov_dense = cov(x_dense , dims=vardim, corrected=corrected) - @test cov_sparse[(1 + vardim):end, (1 + vardim):end] ≈ - cov_dense[ (1 + vardim):end, (1 + vardim):end] - @test isfinite.(cov_sparse) == isfinite.(cov_dense) - @test isfinite.(cov_sparse) == isfinite.(cov_dense) - end -end - @testset "similar should not alias the input sparse array" begin a = sparse(rand(3,3) .+ 0.1) b = similar(a, Float32, Int32)