diff --git a/README.md b/README.md index 02dd68eb5cf421..2efd2e8191df8f 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ Basic statistics functions for Julia ## List of Functions -* `autocor(A, l)`: Compute the autocorrelation of an array `A` at lag(s) `l`. +* `acf(A, l)`: Compute the autocorrelation of an array `A` at lag(s) `l`. +* `ccf(A, B, l)`: Compute the crosscorrelation of array `A` and `B` at lag(s) `l`. * `cor_spearman(x, y)`: Compute Spearman's rank order correlation between `x` and `y`. * `cov_spearman(x, y)`: Compute Spearman's rank order covariance between `x` and `y`. * `decile(a)`: Compute the deciles of `a`. diff --git a/src/Stats.jl b/src/Stats.jl index af0d77ed4833f0..a5d0755a4772c5 100644 --- a/src/Stats.jl +++ b/src/Stats.jl @@ -30,7 +30,8 @@ module Stats wmean, # corr - autocor, + acf, + ccf, cor_spearman, cor_kendall, diff --git a/src/corr.jl b/src/corr.jl index 807ee540d755c9..2c5b35e4817479 100644 --- a/src/corr.jl +++ b/src/corr.jl @@ -138,25 +138,97 @@ function mswaps(x::AbstractVector, y::AbstractVector) return nSwaps end -# autocorrelation for range -function autocor(x::AbstractVector, lags::Ranges) - lx = length(x) - if max(lags) > lx error("Autocorrelation distance must be less than sample size") end - mx = mean(x) - sxinv = 1/stdm(x, mx) - xs = Array(typeof(sxinv), lx) - for i = 1:lx - xs[i] = (x[i] - mx)*sxinv - end - acf = Array(typeof(sxinv), length(lags)) - for i in 1:length(lags) - acf[i] = dot(xs[1:end - lags[i]], xs[lags[i] + 1:end])/(lx - 1) +# Autocorrelation for range +function acf{T<:Real}(x::AbstractVector{T}, lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x)))); + correlation::Bool=true, demean::Bool=true) + lx, llags = length(x), length(lags) + if max(lags) >= lx; error("Autocovariance distance must be less than sample size"); end + + xs = Array(T, lx) + demean ? (mx = mean(x); for i = 1:lx; xs[i] = x[i]-mx; end) : (for i = 1:lx; xs[i] = x[i]; end) + + autocov_sumterm = Array(T, llags) + for i in 1:llags + autocov_sumterm[i] = dot(xs[1:end-lags[i]], xs[lags[i]+1:end]) + end + autocov_sumterm + + if correlation + demean ? + (return autocov_sumterm/(min(lags)==0 ? autocov_sumterm[1] : dot(xs, xs))) : (return autocov_sumterm/dot(x, x)) + else + return autocov_sumterm/lx + end +end + +# Autocorrelation at a specific lag +acf{T<:Real}(x::AbstractVector{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) = + acf(x, lags:lags, correlation=correlation, demean=demean)[1] + +# Cross-correlation for range +function ccf{T<:Real}(x::AbstractVector{T}, y::AbstractVector{T}, + lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x)))); + correlation::Bool=true, demean::Bool=true) + lx, ly, llags = length(x), length(y), length(lags) + if lx != ly error("Input vectors must have same length") end + if max(lags) > lx; error("Cross-covariance distance must be less than sample size"); end + + xs, ys = Array(T, lx), Array(T, ly) + if demean + mx, my = mean(x), mean(y); for i = 1:lx; xs[i], ys[i] = x[i]-mx, y[i]-my; end + else + for i = 1:lx; xs[i], ys[i] = x[i], y[i]; end + end + + crosscov_sumterm = Array(T, llags) + for i in 1:llags + crosscov_sumterm[i] = dot(xs[1:end-lags[i]], ys[lags[i]+1:end]) + end + crosscov_sumterm + + if correlation + demean ? + (return crosscov_sumterm/(sqrt(dot(xs, xs)*dot(ys, ys)))) : (return crosscov_sumterm/sqrt(dot(x, x)*dot(y, y))) + else + return crosscov_sumterm/lx + end +end + +# Cross-correlation at a specific lag +ccf{T<:Real}(x::AbstractVector{T}, y::AbstractVector{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) = + ccf(x, y, lags:lags, correlation=correlation, demean=demean)[1] + +# Cross-correlation between all pairs of columns of a matrix for range +function ccf{T<:Real}(x::AbstractMatrix{T}, lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x)))); + correlation::Bool=true, demean::Bool=true) + ncols = size(x, 2) + + crosscorr = Array(T, length(lags), ncols, ncols) + for i = 1:ncols + for j = 1:ncols + crosscorr[:, i, j] = ccf(x[:, i], x[:, j], lags, correlation=correlation, demean=demean) end - return acf + end + crosscorr end -# autocorrelation at a specific lag -autocor(x::AbstractVector, lags::Real) = autocor(x, lags:lags)[1] +# Cross-correlation between all pairs of columns of a matrix at a specific lag +ccf{T<:Real}(x::AbstractMatrix{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) = + reshape(ccf(x, lags:lags, correlation=correlation, demean=demean), size(x, 2), size(x, 2)) + +# Unlike ccf, compute only autocorrelation (not cross-correlation) of matrix columns for range +function acf{T<:Real}(x::AbstractMatrix{T}, + lags::AbstractVector{Int}=0:min(length(x)-1, int(10log10(length(x)))); + correlation::Bool=true, demean::Bool=true) + ncols = size(x, 2) + + autocorr = Array(T, length(lags), ncols) + for i = 1:ncols + autocorr[:, i] = acf(x[:, i], lags, correlation=correlation, demean=demean) + end + autocorr +end -# autocorrelation at a default of zero to 10log10(length(v)) lags -autocor(v::AbstractVector) = autocor(v, 0:min(length(v) - 1, 10log10(length(v)))) +# Unlike ccf, compute only autocorrelation (not cross-correlation) of matrix columns at a specific lag +acf{T<:Real}(x::AbstractMatrix{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) = + acf(x, lags:lags, correlation=correlation, demean=demean) diff --git a/test/01.jl b/test/01.jl index fdcfab0e0bcef7..297c9484d6e0ea 100644 --- a/test/01.jl +++ b/test/01.jl @@ -1,7 +1,7 @@ using Stats using Base.Test -@test_approx_eq autocor([1, 2, 3, 4, 5], 1) 0.4 +@test_approx_eq acf([1, 2, 3, 4, 5], 1) 0.4 @test iqr([1, 2, 3, 4, 5]) == [2.0, 4.0] diff --git a/test/statquiz.jl b/test/statquiz.jl index 91ec3085149250..1ef9aec5a6eb4f 100644 --- a/test/statquiz.jl +++ b/test/statquiz.jl @@ -57,6 +57,118 @@ for i in 1:5 end println("OK") +print("Test autocorrelation: ") +# x was sampled by calling x = randn(10, 2) +x = [-2.133252557240862 -.7445937365828654; +.1775816414485478 -.5834801838041446; +-.6264517920318317 -.68444205333293; +-.8809042583216906 .9071671734302398; +.09251017186697393 -1.0404476733379926; +-.9271887119115569 -.620728578941385; +3.355819743178915 -.8325051361909978; +-.2834039258495755 -.22394811874731657; +.5354280026977677 .7481337671592626; +.39182285417742585 .3085762550821047] +# Set the number of printed digits in R to 20 by running the command options(digits=20) +# "racfx11" was computed by calling R's acf function on x: acf(x[, 1], plot=FALSE)$acf[, 1, 1] +racfx11 = [0.999999999999999888978, -0.221173011668873431557, 0.229321981664153962122, 0.019505581764945757045, + -0.139015765538446717242, 0.125681062460244019618, -0.427909344123907742219, 0.021699096507690283225, + -0.059889541590524189574, -0.048220059475281865091] +# "jacfx11" are computed by calling Julia's acf function on x +jacfx11 = acf(x[:, 1]) +for i =1:length(racfx11) +@test_approx_eq racfx11[i] jacfx11[i] +end +println("OK") + +print("Test cross-correlation: ") +# "racfx12" was computed by calling R's acf function on x: acf(x, plot=FALSE)$acf[, 2, 1] +racfx12 = [-0.0785530595168460604727, 0.1363402071384945957178, 0.5695846902378886023044, -0.1015722302688646383473, + 0.1601773904236522549915, -0.0251707633078918115166, 0.0079618741584954952351] +# "jacfx12" are computed by calling Julia's acf function on x +jacfx12 = ccf(x[:, 1], x[:, 2], 0:6) +for i =1:length(racfx12) +@test_approx_eq racfx12[i] jacfx12[i] +end +println("OK") + +print("Test autocorrelation vs cross-correlation: ") +@test acf(x[:, 1]) == ccf(x[:, 1], x[:, 1]) + +print("Test autocovariance: ") +# "racvx11" was computed by calling R's acf function on x: acf(x[, 1], plot=FALSE, type="covariance")$acf[, 1, 1] +racvx11 = [1.839214242630635709475, -0.406784553146903871124, 0.421772254824993531042, 0.035874943792884653182, + -0.255679775928512320604, 0.231154400105831353551, -0.787016960267425180753, 0.039909287349160660341, + -0.110149697877911914579, -0.088687020167434751916] +# "jacvx11" are computed by calling Julia's acf function on x +jacvx11 = acf(x[:, 1], correlation=false) +for i =1:length(racvx11) +@test_approx_eq racvx11[i] jacvx11[i] +end +println("OK") + +print("Test cross-covariance: ") +# "racvx12" was computed by calling R's acf function on x: acf(x, plot=FALSE, type="covariance")$acf[, 2, 1] +racvx12 = [-0.0697521748336961816550, 0.1210649976420989648584, 0.5057698724968141545943, -0.0901923363334168753935, + 0.1422315236345411126884, -0.0223506951065748533936, 0.0070698460597599819752] +# "jacfx12" are computed by calling Julia's acf function on x +jacvx12 = ccf(x[:, 1], x[:, 2], 0:6, correlation=false) +for i =1:length(racvx12) +@test_approx_eq racvx12[i] jacvx12[i] +end +println("OK") + +print("Test autocorrelation with demean=false: ") +# "racfx11nodemean" was computed by calling R's acf function on x: acf(x[, 1], plot=FALSE, demean=FALSE)$acf[, 1, 1] +racfx11nodemean = [1.000000000000000000000, -0.223355812053329189082, 0.228124838820096320635, 0.016984315796356772021, + -0.137403649655494369819, 0.125861757190160739039, -0.428765059298546857836, 0.024683201099522964622, + -0.058291459424761618568, -0.045424485824113770838] +# "jacfx11nodemean" are computed by calling Julia's acf function on x +jacfx11nodemean = acf(x[:, 1], demean=false) +for i =1:length(racfx11nodemean) +@test_approx_eq racfx11nodemean[i] jacfx11nodemean[i] +end +println("OK") + +print("Test cross-correlation with demean=false: ") +# "racfx12nodemean" was computed by calling R's acf function on x: acf(x, plot=FALSE, demean=FALSE)$acf[, 2, 1] +racfx12nodemean = [-0.063791841616291797279, 0.143906622654901311664, 0.557311279399980707971, -0.070174737610974355362, + 0.270818343664623484290, 0.071161936583426677050, 0.103265547537476284901] +# "jacfx12" are computed by calling Julia's acf function on x +jacfx12nodemean = ccf(x[:, 1], x[:, 2], 0:6, demean=false) +for i =1:length(racfx12nodemean) +@test_approx_eq racfx12nodemean[i] jacfx12nodemean[i] +end +println("OK") + +print("Test autocorrelation vs cross-correlation with demean=false: ") +@test acf(x[:, 1], demean=false) == ccf(x[:, 1], x[:, 1], demean=false) + +print("Test autocovariance with demean=false: ") +# "racvx11" was computed by calling R's acf function on x: +# acf(x[, 1], plot=FALSE, type="covariance", demean=FALSE)$acf[, 1, 1] +racvx11nodemean = [1.840102514084350771029, -0.410997591294682773633, 0.419773089437946556046, 0.031252882196878647991, + -0.252836801175440550882, 0.231598535832688912084, -0.788971663566781833410, 0.045419620398881817291, + -0.107262261037149780885, -0.083585710565940704586] +# "jacvx11" are computed by calling Julia's acf function on x +jacvx11nodemean = acf(x[:, 1], correlation=false, demean=false) +for i =1:length(racvx11nodemean) +@test_approx_eq racvx11nodemean[i] jacvx11nodemean[i] +end +println("OK") + +print("Test cross-covariance with demean=false: ") +# "racvx12" was computed by calling R's acf function on x: +# acf(x, plot=FALSE, type="covariance", demean=FALSE)$acf[, 2, 1] +racvx12nodemean = [-0.061507621146693322589, 0.138753699571784711031, 0.537355407299582976677, -0.067661962183297230666, + 0.261121040867466125412, 0.068613812119833542114, 0.099567875993390383971] +# "jacfx12" are computed by calling Julia's acf function on x +jacvx12nodemean = ccf(x[:, 1], x[:, 2], 0:6, correlation=false, demean=false) +for i =1:length(racvx12nodemean) +@test_approx_eq racvx12nodemean[i] jacvx12nodemean[i] +end +println("OK") + print("Test spearman correlation: ") cn = colnames(nasty)[[2,5:9]] for i in 1:5