Skip to content

Commit

Permalink
Merge pull request #17 from JuliaStats/anj/fixspearman
Browse files Browse the repository at this point in the history
Fix Spearman's rank correlation to address #2. Remove tiedrank method with dimension argument. Add Kendall's rank correlation. Enable tests.
  • Loading branch information
andreasnoack committed Aug 6, 2013
2 parents dd16c10 + 2877091 commit 32b8504
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/Stats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ module Stats
# corr
autocor,
cor_spearman,
cov_spearman,
cor_kendall,

# others
rle,
Expand Down
143 changes: 117 additions & 26 deletions src/corr.jl
Original file line number Diff line number Diff line change
@@ -1,50 +1,141 @@
# Various correlation

#
# spearman covariance functions
#

# spearman covariance between two vectors
function cov_spearman(x::AbstractVector, y::AbstractVector)
return cov(tiedrank(x), tiedrank(y))
end

# spearman covariance over all pairs of columns of two matrices
function cov_spearman(X::AbstractMatrix, Y::AbstractMatrix)
return [cov_spearman(X[:,i], Y[:,j]) for i = 1:size(X, 2), j = 1:size(Y,2)]
end
function cov_spearman(x::AbstractVector, Y::AbstractMatrix)
return [cov_spearman(x, Y[:,i]) for i = 1:size(Y, 2)]
end
function cov_spearman(X::AbstractMatrix, y::AbstractVector)
return [cov_spearman(X[:,i], y) for i = 1:size(X, 2)]
end
# spearman covariance over all pairs of columns of a matrix
cov_spearman(X::AbstractMatrix) = cov(tiedrank(X, 1))

#
# spearman correlation functions
#

# spearman correlation between two vectors
function cor_spearman(x::AbstractVector, y::AbstractVector)
if any(isnan(x)) || any(isnan(y)) return NaN end
return cor(tiedrank(x), tiedrank(y))
end

# spearman correlation over all pairs of columns of two matrices
function cor_spearman(X::AbstractMatrix, Y::AbstractMatrix)
return cor(tiedrank(X, 1), tiedrank(Y, 1))
return cor(mapslices(tiedrank, X, 1), mapslices(tiedrank, Y, 1))
end
function cor_spearman(X::AbstractMatrix, y::AbstractVector)
return cor(tiedrank(X, 1), tiedrank(y))
return cor(mapslices(tiedrank, X, 1), tiedrank(y))
end
function cor_spearman(x::AbstractVector, Y::AbstractMatrix)
return cor(tiedrank(x), tiedrank(Y, 1))
return cor(tiedrank(x), mapslices(tiedrank, Y, 1))
end

# spearman correlation over all pairs of columns of a matrix
function cor_spearman(X::AbstractMatrix)
return cor(tiedrank(X, 1))
csp = cor(mapslices(tiedrank, X, 1))
nanindex = vec(mapslices(any, isnan(X), 1))
csp[nanindex, :] = NaN
csp[:, nanindex] = NaN
return csp
end

#
# Kendall's rank correlation
#

# Knigh JASA (1966)
function cor_kendall!{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S})
if any(isnan(x)) || any(isnan(y)) return NaN end
n = length(x)
if n != length(y) error("Vectors must have same length") end

# Initial sorting
pm = sortperm(y)
x[:] = x[pm]
y[:] = y[pm]
pm[:] = sortperm(x)
x[:] = x[pm]

# Counting ties in x and y
iT = 1
nT = 0
iU = 1
nU = 0
for i = 2:n
if x[i] == x[i-1]
iT += 1
else
nT += iT*(iT - 1)
iT = 1
end
if y[i] == y[i-1]
iU += 1
else
nU += iU*(iU - 1)
iU = 1
end
end
if iT > 1 nT += iT*(iT - 1) end
nT = div(nT,2)
if iU > 1 nU += iU*(iU - 1) end
nU = div(nU,2)

# Sort y after x
y[:] = y[pm]

# Calculate double ties
iV = 1
nV = 0
jV = 1
for i = 2:n
if x[i] == x[i-1] && y[i] == y[i-1]
iV += 1
else
nV += iV*(iV - 1)
iV = 1
end
end
if iV > 1 nV += iV*(iV - 1) end
nV = div(nV,2)

nD = div(n*(n - 1),2)
return (nD - nT - nU + nV - 2swaps!(y))/sqrt((nD - nT)*(nD - nU))
end
cor_kendall(x::AbstractVector, y::AbstractVector) = cor_kendall!(copy(x), copy(y))
cor_kendall(x::AbstractVector, Y::AbstractMatrix) = [cor_kendall(x, Y[:,i]) for i in 1:size(Y, 2)]
cor_kendall(X::AbstractMatrix, y::AbstractVector) = [cor_kendall(X[:,i], y) for i in 1:size(X, 2)]
cor_kendall(X::AbstractMatrix, Y::AbstractMatrix) = [cor_kendall(X[:,i], Y[:,j]) for i in 1:size(X, 2), j in 1:size(Y, 2)]
function cor_kendall(X::AbstractMatrix)
n = size(X, 2)
C = eye(n)
for j = 2:n
for i = 1:j-1
C[i,j] = cor_kendall!(X[:,i],X[:,j])
C[j,i] = C[i,j]
end
end
return C
end

# Auxilliary functions for Kendall's rank correlation
function swaps!(x::AbstractVector)
n = length(x)
if n == 1 return 0 end
n2 = div(n, 2)
xl = sub(x, 1:n2)
xr = sub(x, n2+1:n)
nsl = swaps!(xl)
nsr = swaps!(xr)
sort!(xl)
sort!(xr)
return nsl + nsr + mswaps(xl,xr)
end

function mswaps(x::AbstractVector, y::AbstractVector)
i = 1
j = 1
nSwaps = 0
n = length(x)
while i <= n && j <= length(y)
if y[j] < x[i]
nSwaps += n - i + 1
j += 1
else
i += 1
end
end
return nSwaps
end

# autocorrelation for range
Expand Down
6 changes: 0 additions & 6 deletions src/scalar_stats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,6 @@ end

tiedrank{T<:Real}(X::AbstractMatrix{T}) = tiedrank(reshape(X, length(X)))

function tiedrank{T<:Real}(X::AbstractMatrix{T}, dim::Int)
retmat = apply(hcat, mapslices(tiedrank, X, 3 - dim))
return dim == 1 ? retmat : retmat'
end


# quantile and friends

quantile{T<:Real}(v::AbstractVector{T}) = quantile(v, [.0, .25, .5, .75, 1.0])
Expand Down
11 changes: 6 additions & 5 deletions test/01.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ values, lengths = rle(z)
@test lengths == [2, 2, 1, 1, 3, 1]
@test inverse_rle(values, lengths) == z

# X = [1 0; 2 1; 3 0; 4 1; 5 10]
# y = [5, 3, 4, 2, 5]
# @assert_approx_eq cov_spearman(X, y)[1] cov_spearman(X[:,1],y)
# @assert_approx_eq cov_spearman(X) cov_spearman(X, X)
# @assert_approx_eq cov_spearman(X, y) [-0.25, -0.1875]
X = [1 0; 2 1; 3 0; 4 1; 5 10]
y = [5, 3, 4, 2, 5]
@test_approx_eq cor_spearman(X, y)[1] cor_spearman(X[:,1],y)
@test_approx_eq cor_spearman(X) cor_spearman(X, X)
@test_approx_eq cor_spearman(X, y) [-0.102597835208515, -0.081110710565381]
@test_approx_eq cor_kendall(X,y) [-0.105409255338946, -0.117851130197758]

fnecdf = ecdf(randn(10000000))
@test_approx_eq_eps fnecdf([-1.96, -1.644854, -1.281552, -0.6744898, 0, 0.6744898, 1.281552, 1.644854, 1.96]) [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975] 1e-3
Expand Down

0 comments on commit 32b8504

Please sign in to comment.