Skip to content

Commit

Permalink
Merge branch 'master' into mean_may_return_incorrect_results
Browse files Browse the repository at this point in the history
  • Loading branch information
josemanuel22 authored May 28, 2024
2 parents 9e96507 + 9addbb8 commit db3682b
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 170 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ jobs:
fail-fast: false
matrix:
version:
- '1' # automatically expands to the latest stable 1.x release of Julia
- '1' # automatically expands to the latest stable 1.x release of Julia (currently 1.9.x)
- '~1.10.0-0'
- 'nightly'
os:
- ubuntu-latest
Expand Down Expand Up @@ -58,6 +59,7 @@ jobs:
- run: |
julia --project=docs -e '
using Pkg
Pkg.respect_sysimage_versions(false)
Pkg.develop(PackageSpec(path=pwd()))
Pkg.instantiate()'
- run: julia --project=docs docs/make.jl
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/tagbot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: TagBot
on:
issue_comment:
types:
- created
workflow_dispatch:
inputs:
lookback:
default: 3
permissions:
actions: read
checks: read
contents: write
deployments: read
issues: read
discussions: read
packages: read
pages: read
pull-requests: read
repository-projects: read
security-events: read
statuses: read
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.DOCUMENTER_KEY }}
17 changes: 16 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
name = "Statistics"
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
keywords = ["statistics"]
license = "MIT"
desc = "Basic statistics for Julia."
version = "1.11.1"

[compat]
julia = "1.9.4"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[weakdeps]
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[extensions]
SparseArraysExt = ["SparseArrays"]

[extras]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Random", "Test"]
test = ["Dates", "Random", "SparseArrays", "Test"]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Statistics.jl

[![Build status](https://github.com/JuliaLang/Statistics.jl/workflows/CI/badge.svg)](https://github.com/JuliaLang/Statistics.jl/actions?query=workflow%3ACI+branch%3Amaster)
[![Build status](https://github.com/JuliaStats/Statistics.jl/workflows/CI/badge.svg)](https://github.com/JuliaStats/Statistics.jl/actions?query=workflow%3ACI+branch%3Amaster)

Development repository for the Statistics standard library (stdlib) that ships with Julia.

Expand Down
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ makedocs(
]
)

deploydocs(repo = "github.com/JuliaLang/Statistics.jl.git")
deploydocs(repo = "github.com/JuliaStats/Statistics.jl.git")
101 changes: 101 additions & 0 deletions ext/SparseArraysExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
module SparseArraysExt

##### SparseArrays optimizations #####

using Base: require_one_based_indexing
using LinearAlgebra
using SparseArrays
using Statistics
using Statistics: centralize_sumabs2, unscaled_covzm

# extended functions
import Statistics: cov, centralize_sumabs2!

function cov(X::SparseMatrixCSC; dims::Int=1, corrected::Bool=true)
vardim = dims
a, b = size(X)
n, p = vardim == 1 ? (a, b) : (b, a)

# The covariance can be decomposed into two terms
# 1/(n - 1) ∑ (x_i - x̄)*(x_i - x̄)' = 1/(n - 1) (∑ x_i*x_i' - n*x̄*x̄')
# which can be evaluated via a sparse matrix-matrix product

# Compute ∑ x_i*x_i' = X'X using sparse matrix-matrix product
out = Matrix(unscaled_covzm(X, vardim))

# Compute x̄
x̄ᵀ = mean(X, dims=vardim)

# Subtract n*x̄*x̄' from X'X
@inbounds for j in 1:p, i in 1:p
out[i,j] -= x̄ᵀ[i] * x̄ᵀ[j]' * n
end

# scale with the sample size n or the corrected sample size n - 1
return rmul!(out, inv(n - corrected))
end

# This is the function that does the reduction underlying var/std
function centralize_sumabs2!(R::AbstractArray{S}, A::SparseMatrixCSC{Tv,Ti}, means::AbstractArray) where {S,Tv,Ti}
require_one_based_indexing(R, A, means)
lsiz = Base.check_reducedims(R,A)
for i in 1:max(ndims(R), ndims(means))
if axes(means, i) != axes(R, i)
throw(DimensionMismatch("dimension $i of `mean` should have indices $(axes(R, i)), but got $(axes(means, i))"))
end
end
isempty(R) || fill!(R, zero(S))
isempty(A) && return R

rowval = rowvals(A)
nzval = nonzeros(A)
m = size(A, 1)
n = size(A, 2)

if size(R, 1) == size(R, 2) == 1
# Reduction along both columns and rows
R[1, 1] = centralize_sumabs2(A, means[1])
elseif size(R, 1) == 1
# Reduction along rows
@inbounds for col = 1:n
mu = means[col]
r = convert(S, (m - length(nzrange(A, col)))*abs2(mu))
@simd for j = nzrange(A, col)
r += abs2(nzval[j] - mu)
end
R[1, col] = r
end
elseif size(R, 2) == 1
# Reduction along columns
rownz = fill(convert(Ti, n), m)
@inbounds for col = 1:n
@simd for j = nzrange(A, col)
row = rowval[j]
R[row, 1] += abs2(nzval[j] - means[row])
rownz[row] -= 1
end
end
for i = 1:m
R[i, 1] += rownz[i]*abs2(means[i])
end
else
# Reduction along a dimension > 2
@inbounds for col = 1:n
lastrow = 0
@simd for j = nzrange(A, col)
row = rowval[j]
for i = lastrow+1:row-1
R[i, col] = abs2(means[i, col])
end
R[row, col] = abs2(nzval[j] - means[row, col])
lastrow = row
end
for i = lastrow+1:m
R[i, col] = abs2(means[i, col])
end
end
end
return R
end

end # module
Loading

0 comments on commit db3682b

Please sign in to comment.