From 00830813ddd74adad48c5ed0913d50009041e6f2 Mon Sep 17 00:00:00 2001 From: Paul Bastide Date: Tue, 21 Mar 2023 18:51:51 +0100 Subject: [PATCH] Get rid of `Tableregressionmodel` (#194) - The PhyloNetworkLinearModel is no longer wrapped in a TableRegression Model - field name for evolutionary model: changed from 'model' to 'evomodel' in TraitSimulation and PhyloNetworkLinearModel. - deprecation notices for old access to model formula and model matrix --- src/PhyloNetworks.jl | 1 + src/deprecated.jl | 23 +++++ src/traits.jl | 168 ++++++++++++++-------------------- test/test_lm.jl | 43 +++++---- test/test_lm_tree.jl | 4 +- test/test_lm_withinspecies.jl | 12 +-- 6 files changed, 128 insertions(+), 123 deletions(-) diff --git a/src/PhyloNetworks.jl b/src/PhyloNetworks.jl index 3dcd9eff..c54ed1be 100644 --- a/src/PhyloNetworks.jl +++ b/src/PhyloNetworks.jl @@ -33,6 +33,7 @@ module PhyloNetworks import Base: show import GLM: ftest + import StatsModels: coefnames const DEBUGC = false # even more debug messages global CHECKNET = false # for debugging only diff --git a/src/deprecated.jl b/src/deprecated.jl index afa7a7e1..4c2691d5 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -11,3 +11,26 @@ @deprecate getMajorParentEdge getparentedge false @deprecate getMinorParentEdge getparentedgeminor false @deprecate getPartner getpartneredge false + +function Base.getproperty(mm::PhyloNetworkLinearModel, f::Symbol) + if f === :model + Base.depwarn("accessing the `model` field of PhyloNetworkLinearModel.jl models is deprecated, " * + "as they are no longer wrapped in a `TableRegressionModel` " * + "and can be used directly now", :getproperty) + return mm + elseif f === :mf + Base.depwarn("accessing the `mf` field of PhyloNetworkLinearModel.jl models is deprecated, " * + "as they are no longer wrapped in a `TableRegressionModel`." * + "Use `formula(m)` to access the model formula.", :getproperty) + form = formula(mm) + return ModelFrame{Nothing, typeof(mm)}(form, nothing, nothing, typeof(mm)) + elseif f === :mm + Base.depwarn("accessing the `mm` field of PhyloNetworkLinearModel.jl models is deprecated, " * + "as they are no longer wrapped in a `TableRegressionModel`." * + "Use `modelmatrix(m)` to access the model matrix.", :getproperty) + modmatr = modelmatrix(mm) + return ModelMatrix{typeof(modmatr)}(modmatr, Int[]) + else + return getfield(mm, f) + end +end \ No newline at end of file diff --git a/src/traits.jl b/src/traits.jl index e28c6c56..f25718e9 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -519,7 +519,7 @@ julia> dfr = innerjoin(dat, dfr_shift, on=:tipNames); # join data and regressors julia> using StatsModels # for statistical model formulas julia> fitBM = phylolm(@formula(trait ~ shift_1 + shift_8), dfr, net; reml=false) # actual fit -StatsModels.TableRegressionModel{PhyloNetworkLinearModel, Matrix{Float64}} +PhyloNetworkLinearModel Formula: trait ~ 1 + shift_1 + shift_8 @@ -654,7 +654,7 @@ julia> dfr = innerjoin(dat, dfr_hybrid, on=:tipNames); # join data and regressor julia> using StatsModels julia> fitBM = phylolm(@formula(trait ~ shift_6), dfr, net; reml=false) # actual fit -StatsModels.TableRegressionModel{PhyloNetworkLinearModel, Matrix{Float64}} +PhyloNetworkLinearModel Formula: trait ~ 1 + shift_6 @@ -1080,12 +1080,12 @@ The `TraitSimulation` object has fields: `M`, `params`, `model`. struct TraitSimulation M::MatrixTopologicalOrder params::ParamsProcess - model::AbstractString + evomodel::AbstractString end function Base.show(io::IO, obj::TraitSimulation) disp = "$(typeof(obj)):\n" - disp = disp * "Trait simulation results on a network with $(length(obj.M.tipNames)) tips, using a $(obj.model) model, with parameters:\n" + disp = disp * "Trait simulation results on a network with $(length(obj.M.tipNames)) tips, using a $(obj.evomodel) model, with parameters:\n" disp = disp * paramstable(obj.params) println(io, disp) end @@ -1561,7 +1561,8 @@ Phylogenetic linear model representation. ## Fields -`lm`, `V`, `Vy`, `RL`, `Y`, `X`, `logdetVy`, `reml`, `ind`, `nonmissing`, `model`, `model_within`. +`lm`, `V`, `Vy`, `RL`, `Y`, `X`, `logdetVy`, `reml`, `ind`, `nonmissing`, +`evomodel`, `model_within` and `formula`. The following syntax pattern can be used to get more information on a specific field: e.g. to find out about the `lm` field, do `?PhyloNetworkLinearModel.lm`. @@ -1638,17 +1639,22 @@ mutable struct PhyloNetworkLinearModel <: GLM.LinPredModel ind::Vector{Int} "nonmissing: vector indicating which tips have non-missing data" nonmissing::BitArray{1} - "model: the model used for the fit" - model::ContinuousTraitEM + "evomodel: the model used for the fit" + evomodel::ContinuousTraitEM # ContinuousTraitEM is abstract: not efficient. parametrize PhyloNetworkLinearModel? # but the types for Vy, Y and X are also abstract. "model_within: the model used for within-species variation (if needed)" model_within::Union{Nothing, WithinSpeciesCTM} + "formula: a StatsModels.FormulaTerm formula" + formula::Union{StatsModels.FormulaTerm,Nothing} end # default model_within=nothing PhyloNetworkLinearModel(lm, V,Vy,RL,Y,X,logdetVy, reml,ind,nonmissing, model) = - PhyloNetworkLinearModel(lm,V,Vy,RL,Y,X,logdetVy, reml,ind,nonmissing, model,nothing) + PhyloNetworkLinearModel(lm,V,Vy,RL,Y,X,logdetVy, reml,ind,nonmissing, model,nothing,nothing) +# default formula=nothing +PhyloNetworkLinearModel(lm,V,Vy,RL,Y,X,logdetVy,reml,ind,nonmissing,model,model_within) = +PhyloNetworkLinearModel(lm,V,Vy,RL,Y,X,logdetVy, reml,ind,nonmissing,model,model_within,nothing) #= ------ roadmap of phylolm methods -------------- @@ -2033,14 +2039,13 @@ end phylolm(f::StatsModels.FormulaTerm, fr::AbstractDataFrame, net::HybridNetwork; kwargs...) Fit a phylogenetic linear regression model to data. - -Return a [`StatsModels.TableRegressionModel`](https://juliastats.org/StatsModels.jl/stable/api/#StatsModels.TableRegressionModel) object. -This object has three fields: `model`, `mf`, `mm` (see [StatsModels](https://juliastats.github.io/StatsModels.jl/stable/)). -To access the fitted [`PhyloNetworkLinearModel`](@ref), do `object.model`. +Return an object of type [`PhyloNetworkLinearModel`](@ref). +It contains a linear model from the GLM package, in `object.lm`, of type +[GLM.LinearModel](https://juliastats.org/GLM.jl/stable/api/#GLM.LinearModel). ## Arguments -* `f`: formula to use for the regression. +* `f`: formula to use for the regression, see [StatsModels](https://juliastats.org/StatsModels.jl/stable/) * `fr`: DataFrame containing the response values, predictor values, species/tip labels for each observation/row. * `net`: phylogenetic network to use. Should have labelled tips. @@ -2078,11 +2083,8 @@ variance components if `model="BM"` and `withinspecies_var=true`. ## Methods applied to fitted models To access the response values, do `response(object)`. -To access the model matrix, do `object.mm.m`. -To access the model formula, do `show(object.mf.f)`. - -All of the StatsBase methods that can be applied to a `PhyloNetworkLinearModel` -can also be applied to a `StatsModels.TableRegressionModel`. +To access the model matrix, do `modelmatrix(object)`. +To access the model formula, do `formula(object)`. ## Within-species variation @@ -2136,7 +2138,7 @@ species standard deviation / sample sizes (if used) will throw an error. ## See also -[`PhyloNetworkLinearModel`](@ref), [`ancestralStateReconstruction`](@ref) +[`simulate`](@ref), [`ancestralStateReconstruction`](@ref), [`vcv`](@ref) ## Examples: Without within-species variation @@ -2152,7 +2154,7 @@ julia> using StatsModels # for stat model formulas julia> fitBM = phylolm(@formula(trait ~ 1), dat, phy; reml=false); julia> fitBM # Shows a summary -StatsModels.TableRegressionModel{PhyloNetworkLinearModel, Matrix{Float64}} +PhyloNetworkLinearModel Formula: trait ~ 1 @@ -2296,7 +2298,7 @@ julia> df = DataFrame( # individual-level observations julia> m1 = phylolm(@formula(trait3 ~ trait1), df, net; tipnames=:species, withinspecies_var=true) -StatsModels.TableRegressionModel{PhyloNetworkLinearModel, Matrix{Float64}} +PhyloNetworkLinearModel Formula: trait3 ~ 1 + trait1 @@ -2326,7 +2328,7 @@ julia> df_r = DataFrame( # species-level statistics (sample means, standard devi julia> m2 = phylolm(@formula(trait3 ~ trait1), df_r, net; tipnames=:species, withinspecies_var=true, y_mean_std=true) -StatsModels.TableRegressionModel{PhyloNetworkLinearModel, Matrix{Float64}} +PhyloNetworkLinearModel Formula: trait3 ~ 1 + trait1 @@ -2408,8 +2410,6 @@ function phylolm(f::StatsModels.FormulaTerm, mf = ModelFrame(f, sch, data, PhyloNetworkLinearModel) mm = StatsModels.ModelMatrix(mf) Y = StatsModels.response(mf) - # Y = convert(Vector{Float64}, StatsModels.response(mf)) - # Y, pred = StatsModels.modelcols(f, fr) if withinspecies_var && y_mean_std # find columns in data frame for: # of individuals from each species @@ -2441,11 +2441,12 @@ function phylolm(f::StatsModels.FormulaTerm, haskey(modeldic, model) || error("phylolm is not defined for model $model.") modelobj = modeldic[model] - StatsModels.TableRegressionModel( - phylolm(mm.m, Y, net, modelobj; reml=reml, nonmissing=nonmissing, ind=ind, - startingValue=startingValue, fixedValue=fixedValue, - withinspecies_var=withinspecies_var, counts=counts, ySD=ySD), - mf, mm) + + res = phylolm(mm.m, Y, net, modelobj; reml=reml, nonmissing=nonmissing, ind=ind, + startingValue=startingValue, fixedValue=fixedValue, + withinspecies_var=withinspecies_var, counts=counts, ySD=ySD) + res.formula = f + return res end ### Methods on type phyloNetworkRegression @@ -2453,6 +2454,9 @@ end ## Un-changed Quantities # Coefficients of the regression StatsBase.coef(m::PhyloNetworkLinearModel) = coef(m.lm) +StatsBase.coefnames(m::PhyloNetworkLinearModel) = + (m.formula === nothing ? ["x$i" for i in 1:length(coef(m))] : coefnames(formula(m).rhs)) +StatsModels.formula(obj::PhyloNetworkLinearModel) = obj.formula """ StatsBase.nobs(m::PhyloNetworkLinearModel) @@ -2533,17 +2537,17 @@ function StatsBase.coeftable(m::PhyloNetworkLinearModel; level::Real=0.95) n_coef = size(m.lm.pp.X, 2) # no. of predictors if n_coef == 0 return CoefTable([0], ["Fixed Value"], ["(Intercept)"]) - else - cc = coef(m) - se = stderror(m) - tt = cc ./ se - p = ccdf.(Ref(FDist(1, dof_residual(m))), abs2.(tt)) - ci = se*quantile(TDist(dof_residual(m)), (1-level)/2) - levstr = isinteger(level*100) ? string(Integer(level*100)) : string(level*100) - CoefTable(hcat(cc,se,tt,p,cc+ci,cc-ci), - ["Coef.","Std. Error","t","Pr(>|t|)","Lower $levstr%","Upper $levstr%"], - ["x$i" for i = 1:n_coef], 4, 3) end + cc = coef(m) + se = stderror(m) + tt = cc ./ se + p = ccdf.(Ref(FDist(1, dof_residual(m))), abs2.(tt)) + ci = se*quantile(TDist(dof_residual(m)), (1-level)/2) + levstr = isinteger(level*100) ? string(Integer(level*100)) : string(level*100) + cn = StatsModels.vectorize(coefnames(m)) + CoefTable(hcat(cc,se,tt,p,cc+ci,cc-ci), + ["Coef.","Std. Error","t","Pr(>|t|)","Lower $levstr%","Upper $levstr%"], + cn, 4, 3) end # degrees of freedom for residuals: at the species level, for coefficients of @@ -2554,7 +2558,7 @@ StatsBase.dof_residual(m::PhyloNetworkLinearModel) = nobs(m.lm) - length(coef(m # degrees of freedom consumed by the species-level model function StatsBase.dof(m::PhyloNetworkLinearModel) res = length(coef(m)) + 1 # +1: phylogenetic variance - if any(typeof(m.model) .== [PagelLambda, ScalingHybrid]) + if any(typeof(m.evomodel) .== [PagelLambda, ScalingHybrid]) res += 1 # lambda is one parameter end if !isnothing(m.model_within) @@ -2590,8 +2594,10 @@ end # Compute the residuals # (Rescaled by cholesky of variance between tips) StatsBase.residuals(m::PhyloNetworkLinearModel) = m.RL * residuals(m.lm) -# Tip data +# Tip data: m.Y is different from response(m.lm) +# and m.X is different from modelmatrix(m.lm) StatsBase.response(m::PhyloNetworkLinearModel) = m.Y +StatsBase.modelmatrix(m::PhyloNetworkLinearModel) = m.X # Predicted values at the tips # (rescaled by cholesky of tips variances) StatsBase.predict(m::PhyloNetworkLinearModel) = m.RL * predict(m.lm) @@ -2668,7 +2674,6 @@ function StatsBase.nulldeviance(m::PhyloNetworkLinearModel) return sum(ro.^2) end StatsModels.hasintercept(m::PhyloNetworkLinearModel) = any(i -> all(==(1), view(m.X , :, i)), 1:size(m.X, 2)) -StatsModels.hasintercept(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) = StatsModels.hasintercept(m.model) # Null Log likelihood (null model with only the intercept) # Same remark function StatsBase.nullloglikelihood(m::PhyloNetworkLinearModel) @@ -2716,17 +2721,12 @@ function sigma2_phylo(m::PhyloNetworkLinearModel) return σ² end -# adapt to TableRegressionModel because sigma2_phylo is a new function -sigma2_phylo(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) = - sigma2_phylo(m.model) - """ sigma2_within(m::PhyloNetworkLinearModel) Estimated within-species variance for a fitted object. """ sigma2_within(m::PhyloNetworkLinearModel) = (isnothing(m.model_within) ? nothing : m.model_within.wsp_var[1]) -sigma2_within(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) = sigma2_within(m.model) # ML estimate for ancestral state of the BM """ mu_phylo(m::PhyloNetworkLinearModel) @@ -2734,19 +2734,13 @@ sigma2_within(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} whe Estimated root value for a fitted object. """ function mu_phylo(m::PhyloNetworkLinearModel) - @warn """You fitted the data against a custom matrix, so I have no way - to know which column is your intercept (column of ones). - I am using the first coefficient for ancestral mean mu by convention, - but that might not be what you are looking for.""" - if size(m.lm.pp.X,2) == 0 - return 0 - else - return coef(m)[1] - end -end -# Need to be adapted manually to TableRegressionModel beacouse it's a new function -function mu_phylo(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) - if m.mf.f.rhs.terms[1] != StatsModels.InterceptTerm{true}() + if m.formula === nothing + @warn """You fitted the data against a custom matrix, so I have no way + to know which column is your intercept (column of ones). + I am using the first coefficient for ancestral mean mu by convention, + but that might not be what you are looking for.""" + size(m.lm.pp.X,2) == 0 && return 0 + elseif m.formula.rhs.terms[1] != StatsModels.InterceptTerm{true}() error("The fit was done without intercept, so I cannot estimate mu") end return coef(m)[1] @@ -2758,7 +2752,7 @@ end Value assigned to the lambda parameter, if appropriate. """ -lambda(m::PhyloNetworkLinearModel) = lambda(m.model) +lambda(m::PhyloNetworkLinearModel) = lambda(m.evomodel) lambda(m::Union{BM,PagelLambda,ScalingHybrid}) = m.lambda """ @@ -2767,7 +2761,7 @@ lambda(m::Union{BM,PagelLambda,ScalingHybrid}) = m.lambda Assign a new value to the lambda parameter. """ -lambda!(m::PhyloNetworkLinearModel, lambda_new) = lambda!(m.model, lambda_new) +lambda!(m::PhyloNetworkLinearModel, lambda_new) = lambda!(m.evomodel, lambda_new) lambda!(m::Union{BM,PagelLambda,ScalingHybrid}, lambda_new::Real) = (m.lambda = lambda_new) """ @@ -2776,14 +2770,13 @@ lambda!(m::Union{BM,PagelLambda,ScalingHybrid}, lambda_new::Real) = (m.lambda = Estimated lambda parameter for a fitted object. """ lambda_estim(m::PhyloNetworkLinearModel) = lambda(m) -lambda_estim(m::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) = lambda_estim(m.model) ### Print the results # Variance function paramstable(m::PhyloNetworkLinearModel) Sig = sigma2_phylo(m) res = "phylogenetic variance rate: " * @sprintf("%.6g", Sig) - if any(typeof(m.model) .== [PagelLambda, ScalingHybrid]) + if any(typeof(m.evomodel) .== [PagelLambda, ScalingHybrid]) Lamb = lambda_estim(m) res = res*"\nLambda: " * @sprintf("%.6g", Lamb) end @@ -2793,22 +2786,20 @@ function paramstable(m::PhyloNetworkLinearModel) end return(res) end -function Base.show(io::IO, obj::PhyloNetworkLinearModel) - println(io, "$(typeof(obj.model)):\n\nParameter Estimates, using ", (obj.reml ? "REML" : "ML"),":\n", - paramstable(obj), "\n\nCoefficients:\n", coeftable(obj)) -end # For DataFrameModel. see also Base.show in # https://github.com/JuliaStats/StatsModels.jl/blob/master/src/statsmodel.jl -function Base.show(io::IO, obj::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) +function Base.show(io::IO, obj::PhyloNetworkLinearModel) ct = coeftable(obj) println(io, "$(typeof(obj))") - print(io, "\nFormula: ") - println(io, string(obj.mf.f)) # formula + if !(obj.formula === nothing) + print(io, "\nFormula: ") + println(io, string(obj.formula)) # formula + end println(io) - println(io, "Model: $(evomodelname(obj.model.model))") + println(io, "Model: $(evomodelname(obj.evomodel))") println(io) - println(io,"Parameter Estimates, using ", (obj.model.reml ? "REML" : "ML"),":") - println(io, paramstable(obj.model)) + println(io,"Parameter Estimates, using ", (obj.reml ? "REML" : "ML"),":") + println(io, paramstable(obj)) println(io) println(io,"Coefficients:") show(io, ct) @@ -2982,7 +2973,7 @@ Models fitted with different criteria (ML and REML) are not nested. Models with different predictors (fixed effects) must be fitted with ML to be considered nested. """ -function isnested(m1m::PhyloNetworkLinearModel, m2m::PhyloNetworkLinearModel; atol::Real=0.0) +function StatsModels.isnested(m1m::PhyloNetworkLinearModel, m2m::PhyloNetworkLinearModel; atol::Real=0.0) if !(nobs(m1m) ≈ nobs(m2m)) @error "Models must have the same number of observations" return false @@ -3019,7 +3010,7 @@ function isnested(m1m::PhyloNetworkLinearModel, m2m::PhyloNetworkLinearModel; at return false end # nesting of phylogenetic variance models - return isnested(m1m.model, m2m.model) + return isnested(m1m.evomodel, m2m.evomodel) end isnested(::T,::T) where T <: ContinuousTraitEM = true @@ -3027,18 +3018,10 @@ isnested(::BM,::Union{PagelLambda,ScalingHybrid}) = true isnested(::Union{PagelLambda,ScalingHybrid}, ::BM) = false isnested(::ScalingHybrid,::PagelLambda) = false isnested(::PagelLambda,::ScalingHybrid) = false -StatsModels.isnested(m1::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T}, - m2::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T}; atol::Real=0.0) where T = - isnested(m1.model, m2.model; atol=atol) ## ANOVA using ftest from GLM - need version 0.8.1 -function GLM.ftest(objs::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T}...) where T - objsModels = [obj.model for obj in objs] - return ftest(objsModels...) -end - function GLM.ftest(objs::PhyloNetworkLinearModel...) - if !all( isa(o.model,BM) && isnothing(o.model_within) for o in objs) + if !all( isa(o.evomodel,BM) && isnothing(o.model_within) for o in objs) throw(ArgumentError("""F test is only valid for the vanilla BM model. Use a likelihood ratio test instead with function `lrtest`.""")) end @@ -3057,11 +3040,6 @@ data, for models that have more and more effects. Returns a DataFrame object with the anova table. """ -function anova(objs::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T}...) where T - objsModels = [obj.model for obj in objs] - return(anova(objsModels...)) -end - function anova(objs::PhyloNetworkLinearModel...) anovaTable = Array{Any}(undef, length(objs)-1, 6) ## Compute binary statistics @@ -3225,8 +3203,7 @@ higher-level methods, for real data: - fits an intercept-only model, then calls #3 - by default without kwargs: model = BM w/o within-species variation -3. ancestralStateReconstruction(TableRegressionModel[, Matrix]) which calls: - ancestralStateReconstruction(PhyloNetworkLinearModel[, Matrix]) +3. ancestralStateReconstruction(PhyloNetworkLinearModel[, Matrix]) - takes a model already fitted - if no matrix given: the model must be intercept-only. An expanded intercept column is created with length = # nodes with *no* data @@ -3610,13 +3587,6 @@ function ancestralStateReconstruction(obj::PhyloNetworkLinearModel) X_n = ones((length(obj.V.nodeNumbersTopOrder) - sum(obj.nonmissing), 1)) ancestralStateReconstruction(obj, X_n) end -# For a TableRegressionModel -function ancestralStateReconstruction(obj::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T) - ancestralStateReconstruction(obj.model) -end -function ancestralStateReconstruction(obj::StatsModels.TableRegressionModel{PhyloNetworkLinearModel,T} where T, X_n::Matrix) - ancestralStateReconstruction(obj.model, X_n) -end """ ancestralStateReconstruction(fr::AbstractDataFrame, net::HybridNetwork; kwargs...) diff --git a/test/test_lm.jl b/test/test_lm.jl index c6cdca0a..b642ad87 100644 --- a/test/test_lm.jl +++ b/test/test_lm.jl @@ -72,9 +72,9 @@ fitbis = phylolm(@formula(trait ~ 1), dfr, net; reml=false) @test coef(phynetlm) ≈ coef(fitbis) @test vcov(phynetlm) ≈ vcov(fitbis) @test nobs(phynetlm) ≈ nobs(fitbis) -@test residuals(phynetlm)[fitbis.model.ind] ≈ residuals(fitbis) -@test response(phynetlm)[fitbis.model.ind] ≈ response(fitbis) -@test predict(phynetlm)[fitbis.model.ind] ≈ predict(fitbis) +@test residuals(phynetlm)[fitbis.ind] ≈ residuals(fitbis) +@test response(phynetlm)[fitbis.ind] ≈ response(fitbis) +@test predict(phynetlm)[fitbis.ind] ≈ predict(fitbis) @test dof_residual(phynetlm) ≈ dof_residual(fitbis) @test sigma2_phylo(phynetlm) ≈ sigma2_phylo(fitbis) @test stderror(phynetlm) ≈ stderror(fitbis) @@ -101,9 +101,9 @@ fitlam = phylolm(@formula(trait ~ 1), dfr, net, model = "lambda", fixedValue=1.0 @test coef(fitlam) ≈ coef(fitbis) @test vcov(fitlam) ≈ vcov(fitbis) @test nobs(fitlam) ≈ nobs(fitbis) -@test residuals(fitlam)[fitbis.model.ind] ≈ residuals(fitbis) -@test response(fitlam)[fitbis.model.ind] ≈ response(fitbis) -@test predict(fitlam)[fitbis.model.ind] ≈ predict(fitbis) +@test residuals(fitlam)[fitbis.ind] ≈ residuals(fitbis) +@test response(fitlam)[fitbis.ind] ≈ response(fitbis) +@test predict(fitlam)[fitbis.ind] ≈ predict(fitbis) @test dof_residual(fitlam) ≈ dof_residual(fitbis) @test sigma2_phylo(fitlam) ≈ sigma2_phylo(fitbis) @test stderror(fitlam) ≈ stderror(fitbis) @@ -125,6 +125,12 @@ fitSH = phylolm(@formula(trait ~ 1), dfr, net, model="scalingHybrid", fixedValue @test loglikelihood(fitlam) ≈ loglikelihood(fitSH) @test aic(fitlam) ≈ aic(fitSH) +@test modelmatrix(fitlam) == reshape(ones(4), (4,1)) +s = IOBuffer(); show(s, formula(fitlam)) +@test String(take!(s)) == "trait ~ 1" +PhyloNetworks.lambda!(fitlam, 0.5) +@test PhyloNetworks.lambda(fitlam) == 0.5 + ## Pagel's Lambda fitlam = (@test_logs (:info, r"^Maximum lambda value") match_mode=:any phylolm(@formula(trait ~ 1), dfr, net, model="lambda", reml=false)) @test lambda_estim(fitlam) ≈ 1.24875 @@ -254,8 +260,8 @@ dfr = DataFrame(trait = Y, reg = X, tipNames = ["Ag","Ak","E","M","Az","Ag2","As phynetlm = phylolm(@formula(trait ~ -1 + reg), dfr, net; reml=false) # Naive version (GLS): most of it hard-coded, but code shown below ntaxa = length(Y) -X = phynetlm.model.X -# Vy = phynetlm.model.Vy; Vyinv = inv(Vy); XtVyinv = X' * Vyinv; logdetVy = logdet(Vy) +X = phynetlm.X +# Vy = phynetlm.Vy; Vyinv = inv(Vy); XtVyinv = X' * Vyinv; logdetVy = logdet(Vy) betahat = [1.073805579608655] # inv(XtVyinv * X) * XtVyinv * Y fittedValues = X * betahat resids = Y - fittedValues @@ -380,6 +386,11 @@ phynetlm = phylolm(@formula(trait ~ pred), dfr, net; reml=false) @test aicc(phynetlm) ≈ aicc(fit_mat) @test bic(phynetlm) ≈ bic(fit_mat) +# Deprecated methods +@test phynetlm.model === phynetlm +@test phynetlm.mf.f == formula(phynetlm) +@test phynetlm.mm.m == modelmatrix(phynetlm) + # unordered data dfr = dfr[[2,6,10,5,12,7,4,11,1,8,3,9], :] fitbis = phylolm(@formula(trait ~ pred), dfr, net; reml=false) @@ -387,9 +398,9 @@ fitbis = phylolm(@formula(trait ~ pred), dfr, net; reml=false) @test coef(phynetlm) ≈ coef(fitbis) @test vcov(phynetlm) ≈ vcov(fitbis) @test nobs(phynetlm) ≈ nobs(fitbis) -@test residuals(phynetlm)[fitbis.model.ind] ≈ residuals(fitbis) -@test response(phynetlm)[fitbis.model.ind] ≈ response(fitbis) -@test predict(phynetlm)[fitbis.model.ind] ≈ predict(fitbis) +@test residuals(phynetlm)[fitbis.ind] ≈ residuals(fitbis) +@test response(phynetlm)[fitbis.ind] ≈ response(fitbis) +@test predict(phynetlm)[fitbis.ind] ≈ predict(fitbis) @test dof_residual(phynetlm) ≈ dof_residual(fitbis) @test sigma2_phylo(phynetlm) ≈ sigma2_phylo(fitbis) @test stderror(phynetlm) ≈ stderror(fitbis) @@ -542,8 +553,8 @@ phynetlm = phylolm(@formula(trait~1), dfr2, net) blup2 = (@test_logs (:warn, r"^These prediction intervals show uncertainty in ancestral values") ancestralStateReconstruction(phynetlm)) @test expectations(blup)[1:length(blup.NodeNumbers),:condExpectation] ≈ expectations(blup2)[1:length(blup.NodeNumbers),:condExpectation] -@test blup.traits_tips[phynetlm.model.ind] ≈ blup2.traits_tips -@test blup.TipNumbers[phynetlm.model.ind] ≈ blup2.TipNumbers +@test blup.traits_tips[phynetlm.ind] ≈ blup2.traits_tips +@test blup.TipNumbers[phynetlm.ind] ≈ blup2.TipNumbers @test predint(blup)[1:length(blup.NodeNumbers), :] ≈ predint(blup2)[1:length(blup.NodeNumbers), :] # With unknown tips @@ -702,9 +713,9 @@ fitbis = phylolm(@formula(trait ~ -1), dfr, net) #@test coef(phynetlm) ≈ coef(fitbis) #@test vcov(phynetlm) ≈ vcov(fitbis) @test nobs(phynetlm) ≈ nobs(fitbis) -@test residuals(phynetlm)[fitbis.model.ind] ≈ residuals(fitbis) -@test response(phynetlm)[fitbis.model.ind] ≈ response(fitbis) -@test predict(phynetlm)[fitbis.model.ind] ≈ predict(fitbis) +@test residuals(phynetlm)[fitbis.ind] ≈ residuals(fitbis) +@test response(phynetlm)[fitbis.ind] ≈ response(fitbis) +@test predict(phynetlm)[fitbis.ind] ≈ predict(fitbis) @test dof_residual(phynetlm) ≈ dof_residual(fitbis) @test sigma2_phylo(phynetlm) ≈ sigma2_phylo(fitbis) #@test stderror(phynetlm) ≈ stderror(fitbis) diff --git a/test/test_lm_tree.jl b/test/test_lm_tree.jl index bb33bf9a..1ea1d54c 100644 --- a/test/test_lm_tree.jl +++ b/test/test_lm_tree.jl @@ -305,11 +305,11 @@ vcovR = [ 0.1165148753 -0.0431446679 -0.0305707092 0.0000000000 @test confint(fitBM)[:,2] ≈ [5.5506215797 0.4514799811 -0.0885456333 0.6852268574]' atol=1e-10 predictR = [4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 4.2109125624, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.7921505131, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 3.8080854332, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547, 4.8773804547] tmp = predict(fitBM) -tmp2 = predictR[fitBM.model.ind] +tmp2 = predictR[fitBM.ind] # below: print for when the test was broken #@show tmp[1:6], tmp[190:end] #@show tmp2[1:6],tmp2[190:end] # the last 5 values are different -@test isapprox(predict(fitBM), predictR[fitBM.model.ind], atol=1e-8) +@test isapprox(predict(fitBM), predictR[fitBM.ind], atol=1e-8) # ## R code to get those results # library(geiger) diff --git a/test/test_lm_withinspecies.jl b/test/test_lm_withinspecies.jl index d48fc739..3a4aff58 100644 --- a/test/test_lm_withinspecies.jl +++ b/test/test_lm_withinspecies.jl @@ -88,7 +88,7 @@ m1 = phylolm(@formula(trait3 ~ trait1), df, starnet; reml=true, tipnames=:species, withinspecies_var=true) m2 = phylolm(@formula(trait3 ~ trait1), df_r, starnet; reml=true, tipnames=:species, withinspecies_var=true, y_mean_std=true) -@test m1.model.reml && m2.model.reml +@test m1.reml && m2.reml @test coef(m1) ≈ [8.457020,2.296978] rtol=1e-5 # fixef(mR) @test coef(m2) ≈ [8.457020,2.296978] rtol=1e-5 @test sigma2_phylo(m1) ≈ 2.1216068 rtol=1e-5 # print(mR, digits=7, ranef.comp="Var") @@ -101,7 +101,7 @@ m2 = phylolm(@formula(trait3 ~ trait1), df_r, starnet; reml=true, @test vcov(m2) ≈ [3.111307 -1.219935; -1.219935 0.5913808] rtol=1e-5 m3 = phylolm(@formula(trait3 ~ trait1), df_r, starnet; reml=false, tipnames=:species, withinspecies_var=true, y_mean_std=true) -@test !m3.model.reml +@test !m3.reml @test coef(m3) ≈ [8.439909,2.318488] rtol=1e-5 @test sigma2_phylo(m3) ≈ 0.9470427 rtol=1e-5 @test sigma2_within(m3) ≈ 0.5299540 rtol=1e-5 @@ -168,7 +168,7 @@ coef(m2) + qt(p=0.975,df=5-3)*sqrt(diag(vcov(m2))) # upper limit 95%-CI: c(1.403 =# m1 = phylolm(@formula(y~x1+x2),df,net; tipnames=:species, reml=true) m2 = phylolm(@formula(y~x1+x2),df,net; tipnames=:species, reml=false) -@test m1.model.reml +@test m1.reml @test coef(m1) ≈ [0.6469652,2.0420889,2.8285257] rtol=1e-5 @test sigma2_phylo(m1) ≈ 0.0438973 rtol=1e-4 @test isnothing(sigma2_within(m1)) @@ -178,7 +178,7 @@ m2 = phylolm(@formula(y~x1+x2),df,net; tipnames=:species, reml=false) @test coeftable(m1).cols[coeftable(m1).pvalcol] ≈ [0.066635016,0.009223303,0.001666460] rtol=1e-4 @test coeftable(m1).cols[findall(coeftable(m1).colnms .== "Lower 95%")[1]] ≈ [-0.1099703,1.1923712,2.3310897] rtol=1e-5 @test coeftable(m1).cols[findall(coeftable(m1).colnms .== "Upper 95%")[1]] ≈ [1.403901,2.891807,3.325962] rtol=1e-5 -@test !m2.model.reml +@test !m2.reml @test coef(m2) ≈ [0.6469652,2.0420889,2.8285257] rtol=1e-5 @test sigma2_phylo(m2) ≈ 0.01755892 rtol=1e-4 @test isnothing(sigma2_within(m2)) @@ -573,7 +573,7 @@ allowmissing!(df_r,[:trait3]); df_r[2,:trait3] = missing # to check imputation @testset "ancestral state prediction, intercept only" begin m1 = phylolm(@formula(trait3 ~ 1), df_r, net; tipnames=:species, withinspecies_var=true, y_mean_std=true) ar1 = (@test_logs (:warn, r"^T") ancestralStateReconstruction(m1)) -# ar.NodeNumbers[8] == 2 (looking at node #2), m1.model.V.tipNames[indexin([2],m1.model.V.tipNumbers)[1]] == "C" (looking at tip "C") +# ar.NodeNumbers[8] == 2 (looking at node #2), m1.V.tipNames[indexin([2],m1.V.tipNumbers)[1]] == "C" (looking at tip "C") @test ar1.traits_nodes[8] ≈ 18.74416393519304 rtol=1e-5 # masked sampled C_bar was 17.0686 @test predint(ar1)[8,:] ≈ [15.24005506417728,22.2482728062088] rtol=1e-5 # on dataframe with model passed as keyword args. must be individual data. @@ -592,7 +592,7 @@ end @testset "ancestral state prediction, more than intercept" begin m3 = phylolm(@formula(trait3 ~ trait1 + trait2), df[[1,6,11,17,16,18,8,5,9,3,12,7,13,10,2,14,4,15],:], net; tipnames=:species, withinspecies_var=true) -X_n = [m3.model.X;m3.model.X[1:3,:]] # 8x3 Array +X_n = [m3.X;m3.X[1:3,:]] # 8x3 Array ar3 = (@test_logs (:warn, r"^T") ancestralStateReconstruction(m3, X_n)) m4 = phylolm(@formula(trait3 ~ trait1 + trait2), df_r[[1,4,6,3,2,5],:], net; tipnames=:species, withinspecies_var=true, y_mean_std=true) ar4 = (@test_logs (:warn, r"^T") ancestralStateReconstruction(m4, X_n))