From a75b20bd3cd56c1b1150d69e0fc517d03972c761 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Fri, 1 Sep 2023 21:28:16 -0500 Subject: [PATCH] termnames --- Project.toml | 2 +- src/StatsModels.jl | 1 + src/contrasts.jl | 12 ++++++------ src/statsmodel.jl | 41 ++++++++++++++++++++++++++++++++++++++++- test/statsmodel.jl | 9 +++++++++ 5 files changed, 57 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 1bf9eb39..7376d625 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "StatsModels" uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d" -version = "0.7.2" +version = "0.7.3" [deps] DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" diff --git a/src/StatsModels.jl b/src/StatsModels.jl index 0e119901..79c2a93f 100644 --- a/src/StatsModels.jl +++ b/src/StatsModels.jl @@ -36,6 +36,7 @@ export coefnames, setcontrasts!, formula, + termnames, AbstractTerm, ConstantTerm, diff --git a/src/contrasts.jl b/src/contrasts.jl index f4f03af8..fa4ae517 100644 --- a/src/contrasts.jl +++ b/src/contrasts.jl @@ -87,7 +87,7 @@ mutable struct MyCoding <: AbstractContrasts end contrasts_matrix(C::MyCoding, baseind, n) = ... -termnames(C::MyCoding, levels, baseind) = ... +_termnames(C::MyCoding, levels, baseind) = ... ``` # References @@ -198,7 +198,7 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst "$c_levels.")) end - tnames = termnames(contrasts, c_levels, baseind) + tnames = _termnames(contrasts, c_levels, baseind) mat = contrasts_matrix(contrasts, baseind, n) @@ -224,7 +224,7 @@ function ContrastsMatrix(c::ContrastsMatrix, levels::AbstractVector) return c end -function termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Integer) +function _termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Integer) not_base = [1:(baseind-1); (baseind+1):length(levels)] levels[not_base] end @@ -233,7 +233,7 @@ Base.getindex(contrasts::ContrastsMatrix, rowinds, colinds) = getindex(contrasts.matrix, getindex.(Ref(contrasts.invindex), rowinds), colinds) # Making a contrast type T only requires that there be a method for -# contrasts_matrix(T, baseind, n) and optionally termnames(T, levels, baseind) +# contrasts_matrix(T, baseind, n) and optionally _termnames(T, levels, baseind) # The rest is boilerplate. for contrastType in [:DummyCoding, :EffectsCoding, :HelmertCoding] @eval begin @@ -462,7 +462,7 @@ function contrasts_matrix(C::SeqDiffCoding, _, n) end # TODO: consider customizing term names: -# termnames(C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) = +# _termnames(C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) = # ["$(levels[i])-$(levels[i-1])" for i in 2:length(levels)] """ @@ -591,7 +591,7 @@ function contrasts_matrix(C::HypothesisCoding, baseind, n) C.contrasts end -termnames(C::HypothesisCoding, levels::AbstractVector, baseind::Int) = +_termnames(C::HypothesisCoding, levels::AbstractVector, baseind::Int) = something(C.labels, levels[1:length(levels) .!= baseind]) DataAPI.levels(c::HypothesisCoding) = c.levels diff --git a/src/statsmodel.jl b/src/statsmodel.jl index 0bb67c7c..d4fa65d4 100644 --- a/src/statsmodel.jl +++ b/src/statsmodel.jl @@ -106,12 +106,51 @@ function formula end formula(m::TableStatisticalModel) = m.mf.f formula(m::TableRegressionModel) = m.mf.f +""" + termnames(model::StatisticalModel) + termnames(term::AbstractTerm) + +Return the names associated with terms associated with a model. + +For models with only continuous predictors, this is the same as +`(responsename(model), coefnames(model))`. + +For models with categorical predictors, the returned names reflect +the categorical predictor and not the coefficients resulting from +the choice of contrast coding. + +```jldoctest + julia> termnames(@formula(y ~ 1 + x * y + (1+x|g))) + termnames( @formula(y ~ 1 + log(x) * log(y) + (1+x|g))) + ("y", ["1", "log(x)", "log(y)", "log(x) & log(y)", "(1 + x) | g"]) +``` +""" +termnames(model::StatisticalModel) = termnames(formula(model)) + +""" + termnames(term::AbstractTerm) + +Return the name(s) of column(s) generated by a term. Return value is either a +`String` or an iterable of `String`s. +""" +termnames(t::FormulaTerm) = (termnames(t.lhs), termnames(t.rhs)) +termnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : nothing +termnames(t::ContinuousTerm) = string(t.sym) +termnames(t::CategoricalTerm) = string(t.sym) +termnames(t::Term) = string(t.sym) +termnames(t::ConstantTerm) = string(t.n) +termnames(t::FunctionTerm) = string(t.exorig) +termnames(ts::TupleTerm) = reduce(vcat, termnames.(ts)) +termnames(t::MatrixTerm) = mapreduce(termnames, vcat, t.terms) +termnames(t::InteractionTerm) = + kron_insideout((args...) -> join(args, " & "), vectorize.(termnames.(t.terms))...) + @doc """ fit(Mod::Type{<:StatisticalModel}, f::FormulaTerm, data, args...; contrasts::Dict{Symbol}, kwargs...) Convert tabular data into a numeric response vector and predictor matrix using -the formula `f`, and then `fit` the specified model type, wrapping the result in +the formula `f`, and then `fit` the specified model type, wrapping Stthe result in a [`TableRegressionModel`](@ref) or [`TableStatisticalModel`](@ref) (as appropriate). diff --git a/test/statsmodel.jl b/test/statsmodel.jl index 7e81b6f9..639109af 100644 --- a/test/statsmodel.jl +++ b/test/statsmodel.jl @@ -161,6 +161,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) ## test copying of names from Terms to CoefTable ct = coeftable(m) @test ct.rownms == ["(Intercept)", "x1", "x2", "x1 & x2"] + @test termnames(m) == ("y", ["(Intercept)", "x1", "x2", "x1 & x2"]) ## show with coeftable defined io = IOBuffer() @@ -171,6 +172,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) m2 = fit(DummyMod, f2, d) @test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"] + @test termnames(m2) == ("y", ["(Intercept)", "x1p"]) ## predict w/ new data missing levels @test predict(m2, d[2:4, :]) == predict(m2)[2:4] @@ -233,6 +235,13 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) m2 = fit(DummyModTwo, f, d) # make sure show() still works when there is no coeftable method show(io, m2) + + # one final termnames check + # note that `1` is still a ConstantTerm and not yet InterceptTerm + # because apply_schema hasn't been called + @test termnames(@formula(y ~ 1 + log(x) * y + (1+x|g)))[2] == + ["1", "log(x)", "y", "log(x) & y", "(1 + x) | g"] + end @testset "lrtest" begin