Skip to content

Commit

Permalink
termnames
Browse files Browse the repository at this point in the history
  • Loading branch information
palday committed Sep 2, 2023
1 parent 4a7d159 commit a75b20b
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "StatsModels"
uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
version = "0.7.2"
version = "0.7.3"

[deps]
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
Expand Down
1 change: 1 addition & 0 deletions src/StatsModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export
coefnames,
setcontrasts!,
formula,
termnames,

AbstractTerm,
ConstantTerm,
Expand Down
12 changes: 6 additions & 6 deletions src/contrasts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ mutable struct MyCoding <: AbstractContrasts
end
contrasts_matrix(C::MyCoding, baseind, n) = ...
termnames(C::MyCoding, levels, baseind) = ...
_termnames(C::MyCoding, levels, baseind) = ...
```
# References
Expand Down Expand Up @@ -198,7 +198,7 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst
"$c_levels."))
end

tnames = termnames(contrasts, c_levels, baseind)
tnames = _termnames(contrasts, c_levels, baseind)

mat = contrasts_matrix(contrasts, baseind, n)

Expand All @@ -224,7 +224,7 @@ function ContrastsMatrix(c::ContrastsMatrix, levels::AbstractVector)
return c
end

function termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Integer)
function _termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Integer)
not_base = [1:(baseind-1); (baseind+1):length(levels)]
levels[not_base]
end
Expand All @@ -233,7 +233,7 @@ Base.getindex(contrasts::ContrastsMatrix, rowinds, colinds) =
getindex(contrasts.matrix, getindex.(Ref(contrasts.invindex), rowinds), colinds)

# Making a contrast type T only requires that there be a method for
# contrasts_matrix(T, baseind, n) and optionally termnames(T, levels, baseind)
# contrasts_matrix(T, baseind, n) and optionally _termnames(T, levels, baseind)
# The rest is boilerplate.
for contrastType in [:DummyCoding, :EffectsCoding, :HelmertCoding]
@eval begin
Expand Down Expand Up @@ -462,7 +462,7 @@ function contrasts_matrix(C::SeqDiffCoding, _, n)
end

# TODO: consider customizing term names:
# termnames(C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) =
# _termnames(C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) =
# ["$(levels[i])-$(levels[i-1])" for i in 2:length(levels)]

"""
Expand Down Expand Up @@ -591,7 +591,7 @@ function contrasts_matrix(C::HypothesisCoding, baseind, n)
C.contrasts
end

termnames(C::HypothesisCoding, levels::AbstractVector, baseind::Int) =
_termnames(C::HypothesisCoding, levels::AbstractVector, baseind::Int) =
something(C.labels, levels[1:length(levels) .!= baseind])

DataAPI.levels(c::HypothesisCoding) = c.levels
Expand Down
41 changes: 40 additions & 1 deletion src/statsmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,51 @@ function formula end
formula(m::TableStatisticalModel) = m.mf.f
formula(m::TableRegressionModel) = m.mf.f

"""
termnames(model::StatisticalModel)
termnames(term::AbstractTerm)
Return the names associated with terms associated with a model.
For models with only continuous predictors, this is the same as
`(responsename(model), coefnames(model))`.
For models with categorical predictors, the returned names reflect
the categorical predictor and not the coefficients resulting from
the choice of contrast coding.
```jldoctest
julia> termnames(@formula(y ~ 1 + x * y + (1+x|g)))
termnames( @formula(y ~ 1 + log(x) * log(y) + (1+x|g)))
("y", ["1", "log(x)", "log(y)", "log(x) & log(y)", "(1 + x) | g"])
```
"""
termnames(model::StatisticalModel) = termnames(formula(model))

"""
termnames(term::AbstractTerm)
Return the name(s) of column(s) generated by a term. Return value is either a
`String` or an iterable of `String`s.
"""
termnames(t::FormulaTerm) = (termnames(t.lhs), termnames(t.rhs))
termnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : nothing
termnames(t::ContinuousTerm) = string(t.sym)
termnames(t::CategoricalTerm) = string(t.sym)
termnames(t::Term) = string(t.sym)
termnames(t::ConstantTerm) = string(t.n)
termnames(t::FunctionTerm) = string(t.exorig)
termnames(ts::TupleTerm) = reduce(vcat, termnames.(ts))
termnames(t::MatrixTerm) = mapreduce(termnames, vcat, t.terms)
termnames(t::InteractionTerm) =
kron_insideout((args...) -> join(args, " & "), vectorize.(termnames.(t.terms))...)

@doc """
fit(Mod::Type{<:StatisticalModel}, f::FormulaTerm, data, args...;
contrasts::Dict{Symbol}, kwargs...)
Convert tabular data into a numeric response vector and predictor matrix using
the formula `f`, and then `fit` the specified model type, wrapping the result in
the formula `f`, and then `fit` the specified model type, wrapping Stthe result in
a [`TableRegressionModel`](@ref) or [`TableStatisticalModel`](@ref) (as
appropriate).
Expand Down
9 changes: 9 additions & 0 deletions test/statsmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
## test copying of names from Terms to CoefTable
ct = coeftable(m)
@test ct.rownms == ["(Intercept)", "x1", "x2", "x1 & x2"]
@test termnames(m) == ("y", ["(Intercept)", "x1", "x2", "x1 & x2"])

## show with coeftable defined
io = IOBuffer()
Expand All @@ -171,6 +172,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
m2 = fit(DummyMod, f2, d)

@test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"]
@test termnames(m2) == ("y", ["(Intercept)", "x1p"])

## predict w/ new data missing levels
@test predict(m2, d[2:4, :]) == predict(m2)[2:4]
Expand Down Expand Up @@ -233,6 +235,13 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
m2 = fit(DummyModTwo, f, d)
# make sure show() still works when there is no coeftable method
show(io, m2)

# one final termnames check
# note that `1` is still a ConstantTerm and not yet InterceptTerm
# because apply_schema hasn't been called
@test termnames(@formula(y ~ 1 + log(x) * y + (1+x|g)))[2] ==
["1", "log(x)", "y", "log(x) & y", "(1 + x) | g"]

end

@testset "lrtest" begin
Expand Down

0 comments on commit a75b20b

Please sign in to comment.