Skip to content

Commit

Permalink
Merge pull request JuliaLang#156 from panlanfeng/anyCoefTable
Browse files Browse the repository at this point in the history
Allow Any type in CoefTable
  • Loading branch information
nalimilan committed May 18, 2016
2 parents 4687603 + 16143d9 commit a67c370
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 29 deletions.
57 changes: 36 additions & 21 deletions src/statmodels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -159,46 +159,61 @@ df_residual(obj::RegressionModel) = error("df_residual is not defined for $(type

## Nms are the coefficient names, corresponding to rows in the table
type CoefTable
mat::Matrix
cols::Vector
colnms::Vector
rownms::Vector
pvalcol::Integer
function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,pvalcol::Int=0)
nr,nc = size(mat)
0 <= pvalcol <= nc || error("pvalcol = $pvalcol should be in 0,...,$nc]")
function CoefTable(cols::Vector,colnms::Vector,rownms::Vector)
nc = length(cols)
nrs = map(length,cols)
nr = nrs[1]
length(colnms) in [0,nc] || error("colnms should have length 0 or $nc")
length(rownms) in [0,nr] || error("rownms should have length 0 or $nr")
new(mat,colnms,rownms,pvalcol)
all(nrs .== nr) || error("Elements of cols should have equal lengths, but got $nrs")
new(cols,colnms,rownms)
end

function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,pvalcol::Int=0)
nc = size(mat,2)
cols = Any[mat[:, i] for i in 1:nc]
if pvalcol != 0 # format the p-values column
cols[pvalcol] = [PValue(cols[pvalcol][j])
for j in eachindex(cols[pvalcol])]
end
CoefTable(cols,colnms,rownms)
end
end

## format numbers in the p-value column
function format_pvc(pv::Number)
if isnan(pv)
return @sprintf("%d", pv)
type PValue
v::Number
function PValue(v::Number)
0. <= v <= 1. || isnan(v) || error("p-values must be in [0.,1.]")
new(v)
end
0. <= pv <= 1. || error("p-values must be in [0.,1.]")
if pv >= 1e-4
return @sprintf("%.4f", pv)
end

function show(io::IO, pv::PValue)
v = pv.v
if isnan(v)
@printf(io,"%d", v)
elseif v >= 1e-4
@printf(io,"%.4f", v)
else
return @sprintf("<1e%2.2d", ceil(Integer, max(nextfloat(log10(pv)), -99)))
@printf(io,"<1e%2.2d", ceil(Integer, max(nextfloat(log10(v)), -99)))
end
end

function show(io::IO, ct::CoefTable)
mat = ct.mat; nr,nc = size(mat); rownms = ct.rownms; colnms = ct.colnms; pvc = ct.pvalcol
cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms;
nc = length(cols)
nr = length(cols[1])
if length(rownms) == 0
rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr]
end
rnwidth = max(4,maximum([length(nm) for nm in rownms]) + 1)
rownms = [rpad(nm,rnwidth) for nm in rownms]
widths = [length(cn)::Int for cn in colnms]
str = [sprint(showcompact,mat[i,j]) for i in 1:nr, j in 1:nc]
if pvc != 0 # format the p-values column
for i in 1:nr
str[i,pvc] = format_pvc(mat[i,pvc])
end
end
str = ByteString[isa(cols[j][i], AbstractString) ? cols[j][i] :
sprint(showcompact,cols[j][i]) for i in 1:nr, j in 1:nc]
for j in 1:nc
for i in 1:nr
lij = length(str[i,j])
Expand Down
36 changes: 29 additions & 7 deletions test/statmodels.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
using StatsBase
using Base.Test

## format_pvc: Formatting of p-values
@test StatsBase.format_pvc(1.0) == "1.0000"
@test StatsBase.format_pvc(1e-1) == "0.1000"
@test StatsBase.format_pvc(1e-5) == "<1e-4"
@test StatsBase.format_pvc(NaN) == "NaN"
@test_throws ErrorException StatsBase.format_pvc(-0.1)
@test_throws ErrorException StatsBase.format_pvc(1.1)
srand(10)
v1 = rand(3)
v2 = ["Good", "Great", "Bad"]
v3 = rand(Int8, 3)
v4 = [StatsBase.PValue(rand()./10000) for i in 1:3]
m = rand(3,4)
@test sprint(show, CoefTable(Any[v1, v2, v3, v4],
["Estimate", "Comments", "df", "p"],
["x1", "x2", "x3"])) == """\
Estimate Comments df p
x1 0.112582 Good 88 <1e-4
x2 0.368314 Great -90 <1e-4
x3 0.344454 Bad -80 <1e-4
"""

@test sprint(show, CoefTable(m, ["Estimate", "Stderr", "df", "p"],
["x1", "x2", "x3"], 4)) == """\
Estimate Stderr df p
x1 0.819778 0.844007 0.923676 0.1717
x2 0.669931 0.67919 0.066098 0.4204
x3 0.453058 0.72525 0.999172 0.5567
"""

@test sprint(show, StatsBase.PValue(1.0)) == "1.0000"
@test sprint(show, StatsBase.PValue(1e-1)) == "0.1000"
@test sprint(show, StatsBase.PValue(1e-5)) == "<1e-4"
@test sprint(show, StatsBase.PValue(NaN)) == "NaN"
@test_throws ErrorException StatsBase.PValue(-0.1)
@test_throws ErrorException StatsBase.PValue(1.1)
2 changes: 1 addition & 1 deletion test/statquiz.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ println("\nII F")
print("Testing regression: ")
ctable = coeftable(lm(big ~ x, nasty))
@test typeof(ctable) == CoefTable
@test_approx_eq ctable.mat[:,1] [99999990, 1]
@test_approx_eq ctable.cols[1] [99999990, 1]

@test sprint(show, ctable) == """\
Estimate Std.Error t value Pr(>|t|)
Expand Down

0 comments on commit a67c370

Please sign in to comment.