Skip to content

Commit

Permalink
Merge pull request JuliaLang#27 from scidom/master
Browse files Browse the repository at this point in the history
Added auto- and cross- -correlation and -covariance functions
  • Loading branch information
andreasnoack committed Oct 4, 2013
2 parents cb60342 + 28be5af commit 47c198b
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 21 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Basic statistics functions for Julia

## List of Functions

* `autocor(A, l)`: Compute the autocorrelation of an array `A` at lag(s) `l`.
* `acf(A, l)`: Compute the autocorrelation of an array `A` at lag(s) `l`.
* `ccf(A, B, l)`: Compute the crosscorrelation of array `A` and `B` at lag(s) `l`.
* `cor_spearman(x, y)`: Compute Spearman's rank order correlation between `x` and `y`.
* `cov_spearman(x, y)`: Compute Spearman's rank order covariance between `x` and `y`.
* `decile(a)`: Compute the deciles of `a`.
Expand Down
3 changes: 2 additions & 1 deletion src/Stats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ module Stats
wmean,

# corr
autocor,
acf,
ccf,
cor_spearman,
cor_kendall,

Expand Down
108 changes: 90 additions & 18 deletions src/corr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,25 +138,97 @@ function mswaps(x::AbstractVector, y::AbstractVector)
return nSwaps
end

# autocorrelation for range
function autocor(x::AbstractVector, lags::Ranges)
lx = length(x)
if max(lags) > lx error("Autocorrelation distance must be less than sample size") end
mx = mean(x)
sxinv = 1/stdm(x, mx)
xs = Array(typeof(sxinv), lx)
for i = 1:lx
xs[i] = (x[i] - mx)*sxinv
end
acf = Array(typeof(sxinv), length(lags))
for i in 1:length(lags)
acf[i] = dot(xs[1:end - lags[i]], xs[lags[i] + 1:end])/(lx - 1)
# Autocorrelation for range
function acf{T<:Real}(x::AbstractVector{T}, lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x))));
correlation::Bool=true, demean::Bool=true)
lx, llags = length(x), length(lags)
if max(lags) >= lx; error("Autocovariance distance must be less than sample size"); end

xs = Array(T, lx)
demean ? (mx = mean(x); for i = 1:lx; xs[i] = x[i]-mx; end) : (for i = 1:lx; xs[i] = x[i]; end)

autocov_sumterm = Array(T, llags)
for i in 1:llags
autocov_sumterm[i] = dot(xs[1:end-lags[i]], xs[lags[i]+1:end])
end
autocov_sumterm

if correlation
demean ?
(return autocov_sumterm/(min(lags)==0 ? autocov_sumterm[1] : dot(xs, xs))) : (return autocov_sumterm/dot(x, x))
else
return autocov_sumterm/lx
end
end

# Autocorrelation at a specific lag
acf{T<:Real}(x::AbstractVector{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) =
acf(x, lags:lags, correlation=correlation, demean=demean)[1]

# Cross-correlation for range
function ccf{T<:Real}(x::AbstractVector{T}, y::AbstractVector{T},
lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x))));
correlation::Bool=true, demean::Bool=true)
lx, ly, llags = length(x), length(y), length(lags)
if lx != ly error("Input vectors must have same length") end
if max(lags) > lx; error("Cross-covariance distance must be less than sample size"); end

xs, ys = Array(T, lx), Array(T, ly)
if demean
mx, my = mean(x), mean(y); for i = 1:lx; xs[i], ys[i] = x[i]-mx, y[i]-my; end
else
for i = 1:lx; xs[i], ys[i] = x[i], y[i]; end
end

crosscov_sumterm = Array(T, llags)
for i in 1:llags
crosscov_sumterm[i] = dot(xs[1:end-lags[i]], ys[lags[i]+1:end])
end
crosscov_sumterm

if correlation
demean ?
(return crosscov_sumterm/(sqrt(dot(xs, xs)*dot(ys, ys)))) : (return crosscov_sumterm/sqrt(dot(x, x)*dot(y, y)))
else
return crosscov_sumterm/lx
end
end

# Cross-correlation at a specific lag
ccf{T<:Real}(x::AbstractVector{T}, y::AbstractVector{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) =
ccf(x, y, lags:lags, correlation=correlation, demean=demean)[1]

# Cross-correlation between all pairs of columns of a matrix for range
function ccf{T<:Real}(x::AbstractMatrix{T}, lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x))));
correlation::Bool=true, demean::Bool=true)
ncols = size(x, 2)

crosscorr = Array(T, length(lags), ncols, ncols)
for i = 1:ncols
for j = 1:ncols
crosscorr[:, i, j] = ccf(x[:, i], x[:, j], lags, correlation=correlation, demean=demean)
end
return acf
end
crosscorr
end

# autocorrelation at a specific lag
autocor(x::AbstractVector, lags::Real) = autocor(x, lags:lags)[1]
# Cross-correlation between all pairs of columns of a matrix at a specific lag
ccf{T<:Real}(x::AbstractMatrix{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) =
reshape(ccf(x, lags:lags, correlation=correlation, demean=demean), size(x, 2), size(x, 2))

# Unlike ccf, compute only autocorrelation (not cross-correlation) of matrix columns for range
function acf{T<:Real}(x::AbstractMatrix{T},
lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x))));
correlation::Bool=true, demean::Bool=true)
ncols = size(x, 2)

autocorr = Array(T, length(lags), ncols)
for i = 1:ncols
autocorr[:, i] = acf(x[:, i], lags, correlation=correlation, demean=demean)
end
autocorr
end

# autocorrelation at a default of zero to 10log10(length(v)) lags
autocor(v::AbstractVector) = autocor(v, 0:min(length(v) - 1, 10log10(length(v))))
# Unlike ccf, compute only autocorrelation (not cross-correlation) of matrix columns at a specific lag
acf{T<:Real}(x::AbstractMatrix{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) =
acf(x, lags:lags, correlation=correlation, demean=demean)
2 changes: 1 addition & 1 deletion test/01.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using Stats
using Base.Test

@test_approx_eq autocor([1, 2, 3, 4, 5], 1) 0.4
@test_approx_eq acf([1, 2, 3, 4, 5], 1) 0.4

@test iqr([1, 2, 3, 4, 5]) == [2.0, 4.0]

Expand Down
112 changes: 112 additions & 0 deletions test/statquiz.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,118 @@ for i in 1:5
end
println("OK")

print("Test autocorrelation: ")
# x was sampled by calling x = randn(10, 2)
x = [-2.133252557240862 -.7445937365828654;
.1775816414485478 -.5834801838041446;
-.6264517920318317 -.68444205333293;
-.8809042583216906 .9071671734302398;
.09251017186697393 -1.0404476733379926;
-.9271887119115569 -.620728578941385;
3.355819743178915 -.8325051361909978;
-.2834039258495755 -.22394811874731657;
.5354280026977677 .7481337671592626;
.39182285417742585 .3085762550821047]
# Set the number of printed digits in R to 20 by running the command options(digits=20)
# "racfx11" was computed by calling R's acf function on x: acf(x[, 1], plot=FALSE)$acf[, 1, 1]
racfx11 = [0.999999999999999888978, -0.221173011668873431557, 0.229321981664153962122, 0.019505581764945757045,
-0.139015765538446717242, 0.125681062460244019618, -0.427909344123907742219, 0.021699096507690283225,
-0.059889541590524189574, -0.048220059475281865091]
# "jacfx11" are computed by calling Julia's acf function on x
jacfx11 = acf(x[:, 1])
for i =1:length(racfx11)
@test_approx_eq racfx11[i] jacfx11[i]
end
println("OK")

print("Test cross-correlation: ")
# "racfx12" was computed by calling R's acf function on x: acf(x, plot=FALSE)$acf[, 2, 1]
racfx12 = [-0.0785530595168460604727, 0.1363402071384945957178, 0.5695846902378886023044, -0.1015722302688646383473,
0.1601773904236522549915, -0.0251707633078918115166, 0.0079618741584954952351]
# "jacfx12" are computed by calling Julia's acf function on x
jacfx12 = ccf(x[:, 1], x[:, 2], 0:6)
for i =1:length(racfx12)
@test_approx_eq racfx12[i] jacfx12[i]
end
println("OK")

print("Test autocorrelation vs cross-correlation: ")
@test acf(x[:, 1]) == ccf(x[:, 1], x[:, 1])

print("Test autocovariance: ")
# "racvx11" was computed by calling R's acf function on x: acf(x[, 1], plot=FALSE, type="covariance")$acf[, 1, 1]
racvx11 = [1.839214242630635709475, -0.406784553146903871124, 0.421772254824993531042, 0.035874943792884653182,
-0.255679775928512320604, 0.231154400105831353551, -0.787016960267425180753, 0.039909287349160660341,
-0.110149697877911914579, -0.088687020167434751916]
# "jacvx11" are computed by calling Julia's acf function on x
jacvx11 = acf(x[:, 1], correlation=false)
for i =1:length(racvx11)
@test_approx_eq racvx11[i] jacvx11[i]
end
println("OK")

print("Test cross-covariance: ")
# "racvx12" was computed by calling R's acf function on x: acf(x, plot=FALSE, type="covariance")$acf[, 2, 1]
racvx12 = [-0.0697521748336961816550, 0.1210649976420989648584, 0.5057698724968141545943, -0.0901923363334168753935,
0.1422315236345411126884, -0.0223506951065748533936, 0.0070698460597599819752]
# "jacfx12" are computed by calling Julia's acf function on x
jacvx12 = ccf(x[:, 1], x[:, 2], 0:6, correlation=false)
for i =1:length(racvx12)
@test_approx_eq racvx12[i] jacvx12[i]
end
println("OK")

print("Test autocorrelation with demean=false: ")
# "racfx11nodemean" was computed by calling R's acf function on x: acf(x[, 1], plot=FALSE, demean=FALSE)$acf[, 1, 1]
racfx11nodemean = [1.000000000000000000000, -0.223355812053329189082, 0.228124838820096320635, 0.016984315796356772021,
-0.137403649655494369819, 0.125861757190160739039, -0.428765059298546857836, 0.024683201099522964622,
-0.058291459424761618568, -0.045424485824113770838]
# "jacfx11nodemean" are computed by calling Julia's acf function on x
jacfx11nodemean = acf(x[:, 1], demean=false)
for i =1:length(racfx11nodemean)
@test_approx_eq racfx11nodemean[i] jacfx11nodemean[i]
end
println("OK")

print("Test cross-correlation with demean=false: ")
# "racfx12nodemean" was computed by calling R's acf function on x: acf(x, plot=FALSE, demean=FALSE)$acf[, 2, 1]
racfx12nodemean = [-0.063791841616291797279, 0.143906622654901311664, 0.557311279399980707971, -0.070174737610974355362,
0.270818343664623484290, 0.071161936583426677050, 0.103265547537476284901]
# "jacfx12" are computed by calling Julia's acf function on x
jacfx12nodemean = ccf(x[:, 1], x[:, 2], 0:6, demean=false)
for i =1:length(racfx12nodemean)
@test_approx_eq racfx12nodemean[i] jacfx12nodemean[i]
end
println("OK")

print("Test autocorrelation vs cross-correlation with demean=false: ")
@test acf(x[:, 1], demean=false) == ccf(x[:, 1], x[:, 1], demean=false)

print("Test autocovariance with demean=false: ")
# "racvx11" was computed by calling R's acf function on x:
# acf(x[, 1], plot=FALSE, type="covariance", demean=FALSE)$acf[, 1, 1]
racvx11nodemean = [1.840102514084350771029, -0.410997591294682773633, 0.419773089437946556046, 0.031252882196878647991,
-0.252836801175440550882, 0.231598535832688912084, -0.788971663566781833410, 0.045419620398881817291,
-0.107262261037149780885, -0.083585710565940704586]
# "jacvx11" are computed by calling Julia's acf function on x
jacvx11nodemean = acf(x[:, 1], correlation=false, demean=false)
for i =1:length(racvx11nodemean)
@test_approx_eq racvx11nodemean[i] jacvx11nodemean[i]
end
println("OK")

print("Test cross-covariance with demean=false: ")
# "racvx12" was computed by calling R's acf function on x:
# acf(x, plot=FALSE, type="covariance", demean=FALSE)$acf[, 2, 1]
racvx12nodemean = [-0.061507621146693322589, 0.138753699571784711031, 0.537355407299582976677, -0.067661962183297230666,
0.261121040867466125412, 0.068613812119833542114, 0.099567875993390383971]
# "jacfx12" are computed by calling Julia's acf function on x
jacvx12nodemean = ccf(x[:, 1], x[:, 2], 0:6, correlation=false, demean=false)
for i =1:length(racvx12nodemean)
@test_approx_eq racvx12nodemean[i] jacvx12nodemean[i]
end
println("OK")

print("Test spearman correlation: ")
cn = colnames(nasty)[[2,5:9]]
for i in 1:5
Expand Down

0 comments on commit 47c198b

Please sign in to comment.