diff --git a/src/statmodels.jl b/src/statmodels.jl index 6054ddcd04bda1..9968b48a038f50 100644 --- a/src/statmodels.jl +++ b/src/statmodels.jl @@ -159,46 +159,61 @@ df_residual(obj::RegressionModel) = error("df_residual is not defined for $(type ## Nms are the coefficient names, corresponding to rows in the table type CoefTable - mat::Matrix + cols::Vector colnms::Vector rownms::Vector - pvalcol::Integer - function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,pvalcol::Int=0) - nr,nc = size(mat) - 0 <= pvalcol <= nc || error("pvalcol = $pvalcol should be in 0,...,$nc]") + function CoefTable(cols::Vector,colnms::Vector,rownms::Vector) + nc = length(cols) + nrs = map(length,cols) + nr = nrs[1] length(colnms) in [0,nc] || error("colnms should have length 0 or $nc") length(rownms) in [0,nr] || error("rownms should have length 0 or $nr") - new(mat,colnms,rownms,pvalcol) + all(nrs .== nr) || error("Elements of cols should have equal lengths, but got $nrs") + new(cols,colnms,rownms) + end + + function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,pvalcol::Int=0) + nc = size(mat,2) + cols = Any[mat[:, i] for i in 1:nc] + if pvalcol != 0 # format the p-values column + cols[pvalcol] = [PValue(cols[pvalcol][j]) + for j in eachindex(cols[pvalcol])] + end + CoefTable(cols,colnms,rownms) end end -## format numbers in the p-value column -function format_pvc(pv::Number) - if isnan(pv) - return @sprintf("%d", pv) +type PValue + v::Number + function PValue(v::Number) + 0. <= v <= 1. || isnan(v) || error("p-values must be in [0.,1.]") + new(v) end - 0. <= pv <= 1. || error("p-values must be in [0.,1.]") - if pv >= 1e-4 - return @sprintf("%.4f", pv) +end + +function show(io::IO, pv::PValue) + v = pv.v + if isnan(v) + @printf(io,"%d", v) + elseif v >= 1e-4 + @printf(io,"%.4f", v) else - return @sprintf("<1e%2.2d", ceil(Integer, max(nextfloat(log10(pv)), -99))) + @printf(io,"<1e%2.2d", ceil(Integer, max(nextfloat(log10(v)), -99))) end end function show(io::IO, ct::CoefTable) - mat = ct.mat; nr,nc = size(mat); rownms = ct.rownms; colnms = ct.colnms; pvc = ct.pvalcol + cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms; + nc = length(cols) + nr = length(cols[1]) if length(rownms) == 0 rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr] end rnwidth = max(4,maximum([length(nm) for nm in rownms]) + 1) rownms = [rpad(nm,rnwidth) for nm in rownms] widths = [length(cn)::Int for cn in colnms] - str = [sprint(showcompact,mat[i,j]) for i in 1:nr, j in 1:nc] - if pvc != 0 # format the p-values column - for i in 1:nr - str[i,pvc] = format_pvc(mat[i,pvc]) - end - end + str = ByteString[isa(cols[j][i], AbstractString) ? cols[j][i] : + sprint(showcompact,cols[j][i]) for i in 1:nr, j in 1:nc] for j in 1:nc for i in 1:nr lij = length(str[i,j]) diff --git a/test/statmodels.jl b/test/statmodels.jl index 78ae4bde5821aa..0c1260523fa186 100644 --- a/test/statmodels.jl +++ b/test/statmodels.jl @@ -1,10 +1,32 @@ using StatsBase using Base.Test -## format_pvc: Formatting of p-values -@test StatsBase.format_pvc(1.0) == "1.0000" -@test StatsBase.format_pvc(1e-1) == "0.1000" -@test StatsBase.format_pvc(1e-5) == "<1e-4" -@test StatsBase.format_pvc(NaN) == "NaN" -@test_throws ErrorException StatsBase.format_pvc(-0.1) -@test_throws ErrorException StatsBase.format_pvc(1.1) +srand(10) +v1 = rand(3) +v2 = ["Good", "Great", "Bad"] +v3 = rand(Int8, 3) +v4 = [StatsBase.PValue(rand()./10000) for i in 1:3] +m = rand(3,4) +@test sprint(show, CoefTable(Any[v1, v2, v3, v4], + ["Estimate", "Comments", "df", "p"], + ["x1", "x2", "x3"])) == """\ + Estimate Comments df p +x1 0.112582 Good 88 <1e-4 +x2 0.368314 Great -90 <1e-4 +x3 0.344454 Bad -80 <1e-4 +""" + +@test sprint(show, CoefTable(m, ["Estimate", "Stderr", "df", "p"], + ["x1", "x2", "x3"], 4)) == """\ + Estimate Stderr df p +x1 0.819778 0.844007 0.923676 0.1717 +x2 0.669931 0.67919 0.066098 0.4204 +x3 0.453058 0.72525 0.999172 0.5567 +""" + +@test sprint(show, StatsBase.PValue(1.0)) == "1.0000" +@test sprint(show, StatsBase.PValue(1e-1)) == "0.1000" +@test sprint(show, StatsBase.PValue(1e-5)) == "<1e-4" +@test sprint(show, StatsBase.PValue(NaN)) == "NaN" +@test_throws ErrorException StatsBase.PValue(-0.1) +@test_throws ErrorException StatsBase.PValue(1.1) diff --git a/test/statquiz.jl b/test/statquiz.jl index 4b4fcb269833a3..f4cd7e0bb8c17c 100644 --- a/test/statquiz.jl +++ b/test/statquiz.jl @@ -69,7 +69,7 @@ println("\nII F") print("Testing regression: ") ctable = coeftable(lm(big ~ x, nasty)) @test typeof(ctable) == CoefTable -@test_approx_eq ctable.mat[:,1] [99999990, 1] +@test_approx_eq ctable.cols[1] [99999990, 1] @test sprint(show, ctable) == """\ Estimate Std.Error t value Pr(>|t|)