From a75b20bd3cd56c1b1150d69e0fc517d03972c761 Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Fri, 1 Sep 2023 21:28:16 -0500
Subject: [PATCH] termnames

---
 Project.toml       |  2 +-
 src/StatsModels.jl |  1 +
 src/contrasts.jl   | 12 ++++++------
 src/statsmodel.jl  | 41 ++++++++++++++++++++++++++++++++++++++++-
 test/statsmodel.jl |  9 +++++++++
 5 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/Project.toml b/Project.toml
index 1bf9eb39..7376d625 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "StatsModels"
 uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
-version = "0.7.2"
+version = "0.7.3"
 
 [deps]
 DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
diff --git a/src/StatsModels.jl b/src/StatsModels.jl
index 0e119901..79c2a93f 100644
--- a/src/StatsModels.jl
+++ b/src/StatsModels.jl
@@ -36,6 +36,7 @@ export
     coefnames,
     setcontrasts!,
     formula,
+    termnames,
 
     AbstractTerm,
     ConstantTerm,
diff --git a/src/contrasts.jl b/src/contrasts.jl
index f4f03af8..fa4ae517 100644
--- a/src/contrasts.jl
+++ b/src/contrasts.jl
@@ -87,7 +87,7 @@ mutable struct MyCoding <: AbstractContrasts
 end
 
 contrasts_matrix(C::MyCoding, baseind, n) = ...
-termnames(C::MyCoding, levels, baseind) = ...
+_termnames(C::MyCoding, levels, baseind) = ...
 ```
 
 # References
@@ -198,7 +198,7 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst
                             "$c_levels."))
     end
 
-    tnames = termnames(contrasts, c_levels, baseind)
+    tnames = _termnames(contrasts, c_levels, baseind)
 
     mat = contrasts_matrix(contrasts, baseind, n)
 
@@ -224,7 +224,7 @@ function ContrastsMatrix(c::ContrastsMatrix, levels::AbstractVector)
     return c
 end
 
-function termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Integer)
+function _termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Integer)
     not_base = [1:(baseind-1); (baseind+1):length(levels)]
     levels[not_base]
 end
@@ -233,7 +233,7 @@ Base.getindex(contrasts::ContrastsMatrix, rowinds, colinds) =
     getindex(contrasts.matrix, getindex.(Ref(contrasts.invindex), rowinds), colinds)
 
 # Making a contrast type T only requires that there be a method for
-# contrasts_matrix(T,  baseind, n) and optionally termnames(T, levels, baseind)
+# contrasts_matrix(T,  baseind, n) and optionally _termnames(T, levels, baseind)
 # The rest is boilerplate.
 for contrastType in [:DummyCoding, :EffectsCoding, :HelmertCoding]
     @eval begin
@@ -462,7 +462,7 @@ function contrasts_matrix(C::SeqDiffCoding, _, n)
 end
 
 # TODO: consider customizing term names:
-# termnames(C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) =
+# _termnames(C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) =
 #     ["$(levels[i])-$(levels[i-1])" for i in 2:length(levels)]
 
 """
@@ -591,7 +591,7 @@ function contrasts_matrix(C::HypothesisCoding, baseind, n)
     C.contrasts
 end
 
-termnames(C::HypothesisCoding, levels::AbstractVector, baseind::Int) =
+_termnames(C::HypothesisCoding, levels::AbstractVector, baseind::Int) =
     something(C.labels, levels[1:length(levels) .!= baseind])
 
 DataAPI.levels(c::HypothesisCoding) = c.levels
diff --git a/src/statsmodel.jl b/src/statsmodel.jl
index 0bb67c7c..d4fa65d4 100644
--- a/src/statsmodel.jl
+++ b/src/statsmodel.jl
@@ -106,12 +106,51 @@ function formula end
 formula(m::TableStatisticalModel) = m.mf.f
 formula(m::TableRegressionModel) = m.mf.f
 
+"""
+    termnames(model::StatisticalModel)
+    termnames(term::AbstractTerm)
+
+Return the names associated with terms associated with a model.
+
+For models with only continuous predictors, this is the same as
+`(responsename(model), coefnames(model))`.
+
+For models with categorical predictors, the returned names reflect
+the categorical predictor and not the coefficients resulting from
+the choice of contrast coding.
+
+```jldoctest
+ julia> termnames(@formula(y ~ 1 + x * y + (1+x|g)))
+ termnames( @formula(y ~ 1 + log(x) * log(y) + (1+x|g)))
+ ("y", ["1", "log(x)", "log(y)", "log(x) & log(y)", "(1 + x) | g"])
+```
+"""
+termnames(model::StatisticalModel) = termnames(formula(model))
+
+"""
+    termnames(term::AbstractTerm)
+
+Return the name(s) of column(s) generated by a term.  Return value is either a
+`String` or an iterable of `String`s.
+"""
+termnames(t::FormulaTerm) = (termnames(t.lhs), termnames(t.rhs))
+termnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : nothing
+termnames(t::ContinuousTerm) = string(t.sym)
+termnames(t::CategoricalTerm) = string(t.sym)
+termnames(t::Term) = string(t.sym)
+termnames(t::ConstantTerm) = string(t.n)
+termnames(t::FunctionTerm) = string(t.exorig)
+termnames(ts::TupleTerm) = reduce(vcat, termnames.(ts))
+termnames(t::MatrixTerm) = mapreduce(termnames, vcat, t.terms)
+termnames(t::InteractionTerm) =
+    kron_insideout((args...) -> join(args, " & "), vectorize.(termnames.(t.terms))...)
+
 @doc """
     fit(Mod::Type{<:StatisticalModel}, f::FormulaTerm, data, args...;
         contrasts::Dict{Symbol}, kwargs...)
 
 Convert tabular data into a numeric response vector and predictor matrix using
-the formula `f`, and then `fit` the specified model type, wrapping the result in
+the formula `f`, and then `fit` the specified model type, wrapping Stthe result in
 a [`TableRegressionModel`](@ref) or [`TableStatisticalModel`](@ref) (as
 appropriate).
 
diff --git a/test/statsmodel.jl b/test/statsmodel.jl
index 7e81b6f9..639109af 100644
--- a/test/statsmodel.jl
+++ b/test/statsmodel.jl
@@ -161,6 +161,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     ## test copying of names from Terms to CoefTable
     ct = coeftable(m)
     @test ct.rownms == ["(Intercept)", "x1", "x2", "x1 & x2"]
+    @test termnames(m) == ("y", ["(Intercept)", "x1", "x2", "x1 & x2"])
 
     ## show with coeftable defined
     io = IOBuffer()
@@ -171,6 +172,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     m2 = fit(DummyMod, f2, d)
 
     @test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"]
+    @test termnames(m2) == ("y", ["(Intercept)", "x1p"])
 
     ## predict w/ new data missing levels
     @test predict(m2, d[2:4, :]) == predict(m2)[2:4]
@@ -233,6 +235,13 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     m2 = fit(DummyModTwo, f, d)
     # make sure show() still works when there is no coeftable method
     show(io, m2)
+
+    # one final termnames check
+    # note that `1` is still a ConstantTerm and not yet InterceptTerm
+    # because apply_schema hasn't been called
+    @test termnames(@formula(y ~ 1 + log(x) * y + (1+x|g)))[2] == 
+          ["1", "log(x)", "y", "log(x) & y", "(1 + x) | g"]
+          
 end
 
 @testset "lrtest" begin