diff --git a/docs/src/nonparametric.md b/docs/src/nonparametric.md index 048a0ba7..889b976b 100644 --- a/docs/src/nonparametric.md +++ b/docs/src/nonparametric.md @@ -81,3 +81,8 @@ ApproximatePermutationTest ```@docs FlignerKilleenTest ``` + +## Shapiro-Wilk test +```@docs +ShapiroWilkTest +``` diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index e3d12e2b..cc33ed8c 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -148,5 +148,6 @@ include("diebold_mariano.jl") include("clark_west.jl") include("white.jl") include("var_equality.jl") +include("shapiro_wilk.jl") end diff --git a/src/shapiro_wilk.jl b/src/shapiro_wilk.jl new file mode 100644 index 00000000..8ccb8f73 --- /dev/null +++ b/src/shapiro_wilk.jl @@ -0,0 +1,252 @@ +export ShapiroWilkTest + +#= +From: +PATRICK ROYSTON +Approximating the Shapiro-Wilk W-test for non-normality +*Statistics and Computing* (1992) **2**, 117-119 +DOI: [10.1007/BF01891203](https://doi.org/10.1007/BF01891203) +=# + +# Coefficients from Royston (1992) +for (s, c) in [(:C1, [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056]), + (:C2, [0.0, 0.042981, -0.293762, -1.752461, 5.682633, -3.582633]), + (:C3, [0.5440, -0.39978, 0.025054, -0.0006714]), + (:C4, [1.3822, -0.77857, 0.062767, -0.0020322]), + (:C5, [-1.5861, -0.31082, -0.083751, 0.0038915]), + (:C6, [-0.4803, -0.082676, 0.0030302]), + (:C7, [0.164, 0.533]), + (:C8, [0.1736, 0.315]), + (:C9, [0.256, -0.00635]), + (:G, [-2.273, 0.459])] + @eval $(Symbol(:__RS92_, s))(x) = Base.Math.@horner(x, $(c...)) +end + +""" + HypothesisTests.shapiro_wilk_coefs(N::Integer) + +Construct a vector of de-correlated expected order statistics for Shapiro-Wilk +test for a sample of size `N`. + +If multiple tests on samples of size `N` are performed, it is beneficial to +construct and pass a single vector of coefficients to [`ShapiroWilkTest`](@ref). +""" +function shapiro_wilk_coefs(N::Integer) + if N < 3 + throw(ArgumentError("N must be greater than or equal to 3, got $N")) + elseif N == 3 # exact + w = sqrt(2.0) / 2.0 + return [w,zero(w),-w] + else + n = div(N, 2) + swc = Vector{Float64}(undef, N) + m = @view swc[1:n] # store only positive half of swc: + # it is (anti-)symmetric; hence '2' factor below + for i in eachindex(m) + # Weisberg&Bingham 1975 statistic + m[i] = -quantile(Normal(), (i - 3 / 8) / (N + 1 / 4)) + end + mᵀm = 2sum(abs2, m) + x = 1 / sqrt(N) + a₁ = m[1] / sqrt(mᵀm) + __RS92_C1(x) # aₙ = cₙ + (...) + if N ≤ 5 + ϕ = (mᵀm - 2m[1]^2) / (1 - 2a₁^2) + m .= m ./ sqrt(ϕ) # A, but reusing m to save allocs + m[1] = a₁ + else + a₂ = m[2] / sqrt(mᵀm) + __RS92_C2(x) # aₙ₋₁ = cₙ₋₁ + (...) + ϕ = (mᵀm - 2m[1]^2 - 2m[2]^2) / (1 - 2a₁^2 - 2a₂^2) + m .= m ./ sqrt(ϕ) # A, but reusing m to save allocs + m[1], m[2] = a₁, a₂ + end + + for i in 1:n + swc[N-i+1] = -swc[i] + end + if isodd(N) + swc[n+1] = zero(eltype(swc)) + end + return swc + end +end + +function unsafe_swstat(X::AbstractVector{<:Real}, A::AbstractVector{<:Real}) + _begin = firstindex(A) + AX = @inbounds dot(view(A, _begin:(_begin + length(X) - 1)), X) + m = mean(X) + S² = sum(x -> abs2(x - m), X) + W = AX^2 / S² + return clamp(W, 0, 1) # to guard against numeric errors +end + +struct ShapiroWilkTest <: HypothesisTest + coefs::Vector{Float64} # expectation of order statistics for Shapiro-Wilk test + W::Float64 # test statistic + censored::Int # only the smallest N₁ samples were used +end + +testname(::ShapiroWilkTest) = "Shapiro-Wilk normality test" +function population_param_of_interest(t::ShapiroWilkTest) + return ("Squared correlation of data and expected order statistics of N(0,1) (W)", + 1.0, t.W) +end +default_tail(::ShapiroWilkTest) = :left +censored_ratio(t::ShapiroWilkTest) = t.censored / length(t.coefs) + +function show_params(io::IO, t::ShapiroWilkTest, indent) + l = 24 + println(io, indent, rpad("number of observations:", l), length(t.coefs)) + println(io, indent, rpad("censored ratio:", l), censored_ratio(t)) + return println(io, indent, rpad("W-statistic:", l), t.W) +end + +function StatsAPI.pvalue(t::ShapiroWilkTest) + n = length(t.coefs) + W = t.W + + if iszero(censored_ratio(t)) + if n == 3 # exact by Shapiro&Wilk 1965 + # equivalent to 6/π * (asin(sqrt(W)) - asin(sqrt(3/4))) + return 1 - 6acos(sqrt(W)) / π + elseif n ≤ 11 # Royston 1992 + γ = __RS92_G(n) + w = -log(γ - log1p(-W)) + μ = __RS92_C3(n) + σ = exp(__RS92_C4(n)) + elseif 12 ≤ n # Royston 1992 + w = log1p(-W) + μ = __RS92_C5(log(n)) + σ = exp(__RS92_C6(log(n))) + end + return ccdf(Normal(μ, σ), w) + else + throw(ArgumentError("censored samples not implemented yet")) + # to implement censored samples follow Royston 1993 Section 3.3 + end +end + +""" + ShapiroWilkTest(X::AbstractVector{<:Real}, + swc::AbstractVector{<:Real}=shapiro_wilk_coefs(length(X)); + sorted::Bool=issorted(X), + censored::Integer=0) + +Perform a Shapiro-Wilk test of the null hypothesis that the data in vector `X` come from a +normal distribution. + +This implementation is based on the method by Royston (1992). +The calculation of the p-value is exact for sample size `N = 3`, and for ranges +`4 ≤ N ≤ 11` and `12 ≤ N ≤ 5000` (Royston 1992) two separate approximations +for p-values are used. + +# Keyword arguments +The following keyword arguments may be passed. +* `sorted::Bool=issorted(X)`: to indicate that sample `X` is already sorted. +* `censored::Integer=0`: to censor the largest samples from `X` + (so called upper-tail censoring) + +Implements: [`pvalue`](@ref) + +!!! warning + As noted by Royston (1993), (approximated) W-statistic will be accurate + but returned p-values may not be reliable if either of these apply: + * Sample size is large (`N > 2000`) or small (`N < 20`) + * Too much data is censored (`censored / N > 0.8`) + +# Implementation notes +* The current implementation DOES NOT implement p-values for censored data. +* If multiple Shapiro-Wilk tests are to be performed on samples of same + size, it is faster to construct `swc = shapiro_wilk_coefs(length(X))` once + and pass it to the test via `ShapiroWilkTest(X, swc)` for re-use. +* For maximal performance sorted `X` should be passed and indicated with + `sorted=true` keyword argument. + +# References +Shapiro, S. S., & Wilk, M. B. (1965). An Analysis of Variance Test for Normality +(Complete Samples). *Biometrika*, 52, 591–611. +[doi:10.1093/BIOMET/52.3-4.591](https://doi.org/10.1093/BIOMET/52.3-4.591). + +Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality. +*Statistics and Computing*, 2(3), 117–119. +[doi:10.1007/BF01891203](https://doi.org/10.1007/BF01891203) + +Royston, P. (1993). A Toolkit for Testing for Non-Normality in Complete and +Censored Samples. Journal of the Royal Statistical Society Series D +(The Statistician), 42(1), 37–43. +[doi:10.2307/2348109](https://doi.org/10.2307/2348109) + +Royston, P. (1995). Remark AS R94: A Remark on Algorithm AS 181: The W-test for +Normality. *Journal of the Royal Statistical Society Series C +(Applied Statistics)*, 44(4), 547–551. +[doi:10.2307/2986146](https://doi.org/10.2307/2986146). +""" +function ShapiroWilkTest(sample::AbstractVector{<:Real}, + swcoefs::AbstractVector{<:Real}=shapiro_wilk_coefs(length(sample)); + sorted::Bool=issorted(sample), + censored::Integer=0) + N = length(sample) + if N < 3 + throw(ArgumentError("at least 3 samples are required, got $N")) + elseif censored ≥ N + throw(ArgumentError("`censored` must be less than `length(sample)`, " * + "got `censored = $censored > $N = length(sample)`")) + elseif length(swcoefs) ≠ length(sample) + throw(DimensionMismatch("length of sample and Shapiro-Wilk coefficients " * + "differ, got $N and $(length(swcoefs))")) + end + + W = if !sorted + X = sort!(sample[1:(end - censored)]) + if abs(last(X) - first(X)) < length(X) * eps() + throw(ArgumentError("sample is constant (up to numerical accuracy)")) + end + unsafe_swstat(X, swcoefs) + else + X = @view sample[1:(end - censored)] + if last(X) - first(X) < length(X) * eps() + throw(ArgumentError("sample doesn't seem to be sorted or " * + "is constant (up to numerical accuracy)")) + end + unsafe_swstat(X, swcoefs) + end + + return ShapiroWilkTest(swcoefs, W, censored) +end + +#= +# To compare with the standard ALGORITHM AS R94 fortran subroutine +# * grab scipys (swilk.f)[https://raw.githubusercontent.com/scipy/scipy/main/scipy/stats/statlib/swilk.f]; +# * compile +# ``` +# gfortran -shared -fPIC -o swilk.so swilk.f +# gfortran -fdefault-integer-8 -fdefault-real-8 -shared -fPIC swilk.f -o swilk64.so +# ``` + +for (lib, I, F) in (("./swilk64.so", Int64, Float64), + ("./swilk.so" , Int32, Float32)) + @eval begin + function swilkfort!(X::AbstractVector{$F}, A::AbstractVector{$F}, computeA=true) + X = issorted(X) ? X : sort(X) + w, pval = Ref{$F}(0.0), Ref{$F}(0.0) + ifault = Ref{$I}(0) + + ccall((:swilk_, $lib), + Cvoid, + ( + Ref{Bool}, # INIT if false compute SWCoeffs in A, else use A + Ref{$F}, # X sample + Ref{$I}, # N samples length + Ref{$I}, # N1 (upper) uncensored data length + Ref{$I}, # N2 length of A + Ref{$F}, # A A + Ref{$F}, # W W-statistic + Ref{$F}, # PW p-value + Ref{$I}, # IFAULT error code (see swilk.f for meaning) + ), + !computeA, X, length(X), length(X), div(N,2), A, w, pval, ifault) + return (w[], pval[], ifault[], A) + end + swilkfort(X::Vector{$F}) = swilkfort!(X, zeros($F, div(length(X),2))) + end +end +=# diff --git a/test/runtests.jl b/test/runtests.jl index 18fff3db..5cbbfd76 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,6 +30,7 @@ include("mann_whitney.jl") include("correlation.jl") include("permutation.jl") include("power_divergence.jl") +include("shapiro_wilk.jl") include("show.jl") include("t.jl") include("wald_wolfowitz.jl") diff --git a/test/shapiro_wilk.jl b/test/shapiro_wilk.jl new file mode 100644 index 00000000..acfba784 --- /dev/null +++ b/test/shapiro_wilk.jl @@ -0,0 +1,130 @@ +using HypothesisTests, LinearAlgebra, Test, Random +using StableRNGs + +@testset "Shapiro-Wilk" begin + @testset "shapiro_wilk_coefs" begin + @test HypothesisTests.shapiro_wilk_coefs(3) == [sqrt(2.0) / 2.0, 0.0, -sqrt(2.0) / 2.0] + + swc = HypothesisTests.shapiro_wilk_coefs(10) + @test swc[4] == -swc[7] + @test swc[2] == -swc[9] + + swc = HypothesisTests.shapiro_wilk_coefs(11) + + @test swc[6] == 0.0 + @test swc[5] == -swc[7] + @test swc[3] == -swc[9] + @test swc[1] == -swc[11] + + #anti-symmetry + swc = HypothesisTests.shapiro_wilk_coefs(20) + @test all([swc[i] == -swc[end - i + 1] for i in eachindex(swc)]) + + # Values obtained by calling `_swilkfort` fortran subroutine directly. + + swc10 = HypothesisTests.shapiro_wilk_coefs(10) + res = swc10[1:5] .- [0.573715, 0.32897, 0.214349, 0.122791, 0.0400887] + @test norm(res, 1) ≈ 0.0 atol = length(swc10) * eps(Float32) + + swc20 = HypothesisTests.shapiro_wilk_coefs(20) + res = swc20[1:10] .- + [0.473371, 0.32174, 0.255663, 0.208297, 0.16864, 0.133584, 0.101474, + 0.0712893, 0.0423232, 0.0140351] + @test norm(res, 1) ≈ 0.0 atol = length(swc20) * eps(Float32) + + rng = StableRNG(0x5bca7c69b794f8ce) + X = sort(rand(rng, Float64, 10)) + # W, pval, _ = swilkfort(X) + W = 0.9088434774710951 + # pval = 0.2731410626084226 + @test HypothesisTests.unsafe_swstat(X, swc10) ≈ W atol = eps(Float32) + end + + @testset "Shapiro-Wilk" begin + # syntactic tests + + @test_throws ArgumentError ShapiroWilkTest([1, 2]) + @test_throws ArgumentError("at least 3 samples are required, got 2") ShapiroWilkTest([1, 2], HypothesisTests.shapiro_wilk_coefs(3)) + @test_throws ArgumentError ShapiroWilkTest([1, 2, 3], censored=4) + @test_throws DimensionMismatch ShapiroWilkTest([1, 2, 3], + HypothesisTests.shapiro_wilk_coefs(4)) + + @test_throws ArgumentError("sample doesn't seem to be sorted or is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1]) + @test_throws ArgumentError("sample is constant (up to numerical accuracy)") ShapiroWilkTest([1,1,1], sorted=false) + + t = ShapiroWilkTest([1, 2, 3]) + @test t.W == 1.0 + @test pvalue(t) == 1.0 + + @test_throws ArgumentError("censored samples not implemented yet") pvalue(ShapiroWilkTest(1:4, censored=1)) + + str = sprint(show, t) + @test str == + """Shapiro-Wilk normality test + --------------------------- + Population details: + parameter of interest: Squared correlation of data and expected order statistics of N(0,1) (W) + value under h_0: 1.0 + point estimate: 1.0 + + Test summary: + outcome with 95% confidence: fail to reject h_0 + one-sided p-value: 1.0000 + + Details: + number of observations: 3 + censored ratio: 0.0 + W-statistic: 1.0 + """ + + # testing different cases of N + for N in (3, 5, 11, 12) + rng = StableRNG(0x5bca7c69b794f8ce) + X = sort(randn(rng, N)) + t = ShapiroWilkTest(X; sorted=true) + + # analytic properties from Shapiro-Wilk 1965: + # Lemma 1: Scale and origin invariance: + scale, shift = rand(rng, 2) + @test t.W ≈ ShapiroWilkTest(X .+ shift).W + @test t.W ≈ ShapiroWilkTest(scale .* X).W + @test t.W ≈ ShapiroWilkTest(scale .* X .+ shift).W + # Lemma 2, 3: upper and lower bounds + @test N * t.coefs[1]^2 / (N - 1) ≤ t.W ≤ 1.0 + + # test the computation of pvalue in those cases + @test pvalue(t) ≥ 0.05 + end + + @testset "Worked Example" begin + # **Worked Example** (Section 4) from + # PATRICK ROYSTON Approximating the Shapiro-Wilk W-test for non-normality + # *Statistics and Computing* (1992) **2**, 117-119 + + X = [48.4, 49.0, 59.5, 59.6, 60.7, 88.8, 98.2, 109.4, 169.1, 227.1] + swc = HypothesisTests.shapiro_wilk_coefs(length(X)) + @test norm(swc[1:5] .- [0.5737, 0.3290, 0.2143, 0.1228, 0.0401], Inf) < 5.0e-5 + W = HypothesisTests.unsafe_swstat(X, swc) + @test W ≈ 0.8078 atol = 2.9e-5 + + t = ShapiroWilkTest(X) + @test t.W == W + @test pvalue(t) ≈ 0.018 atol = 4.7e-5 + @test pvalue(t) ≈ pvalue(ShapiroWilkTest(X, sorted=true)) + @test iszero(HypothesisTests.censored_ratio(t)) + @test length(t.coefs) == length(X) + + # test for un-sorted sample + X2 = X[[9,8,2,3,4,5,7,10,1,6]] + t2 = ShapiroWilkTest(X2) + @test_throws ArgumentError ShapiroWilkTest(X2, sorted=true) + @test t2.W ≈ t.W + @test pvalue(t2) ≈ pvalue(t) + X3 = X[[2,8,9,3,4,5,7,10,1,6]] + t3 = ShapiroWilkTest(X3) + @test t3.W ≈ t.W + @test pvalue(t3) ≈ pvalue(t) + @test pvalue(t3) ≠ pvalue(ShapiroWilkTest(X3, sorted=true)) + end + end +end