diff --git a/src/statmodels.jl b/src/statmodels.jl index 8151d0963..97924dae3 100644 --- a/src/statmodels.jl +++ b/src/statmodels.jl @@ -197,7 +197,12 @@ Coefficient of determination (R-squared). For a linear model, the R² is defined as ``ESS/TSS``, with ``ESS`` the explained sum of squares and ``TSS`` the total sum of squares. """ -r2(obj::StatisticalModel) = mss(obj) / deviance(obj) +function r2(obj::StatisticalModel) + Base.depwarn("The default r² method for linear models is deprecated. " * + "Packages should define their own methods.", :r2) + + mss(obj) / deviance(obj) +end """ r2(obj::StatisticalModel, variant::Symbol) @@ -207,25 +212,27 @@ Pseudo-coefficient of determination (pseudo R-squared). For nonlinear models, one of several pseudo R² definitions must be chosen via `variant`. Supported variants are: -- `:MacFadden` (a.k.a. likelihood ratio index), defined as ``1 - \\log L/\\log L0``. -- `:CoxSnell`, defined as ``1 - (L0/L)^{2/n}`` -- `:Nagelkerke`, defined as ``(1 - (L0/L)^{2/n})/(1 - L0^{2/n})``, with ``n`` the number -of observations (as returned by [`nobs`](@ref)). +- `:MacFadden` (a.k.a. likelihood ratio index), defined as ``1 - \\log (L)/\\log (L_0)``; +- `:CoxSnell`, defined as ``1 - (L_0/L)^{2/n}``; +- `:Nagelkerke`, defined as ``(1 - (L_0/L)^{2/n})/(1 - L_0^{2/n})``. -In the above formulas, ``L`` is the likelihood of the model, ``L0`` that of the null model -(the model including only the intercept). These two quantities are taken to be minus half -`deviance` of the corresponding models. +In the above formulas, ``L`` is the likelihood of the model, +``L_0`` is the likelihood of the null model (the model with only an intercept), +``n`` is the number of observations, ``y_i`` are the responses, +``\\hat{y}_i`` are fitted values and ``\\bar{y}`` is the average response. + +Cox and Snell's R² should match the classical R² for linear models. """ -function r2(obj::StatisticalModel, variant::Symbol) - ll = -deviance(obj)/2 - ll0 = -nulldeviance(obj)/2 +function r2(obj::StatisticalModel, variant::Symbol) + ll = loglikelihood(obj) + ll0 = nullloglikelihood(obj) if variant == :McFadden 1 - ll/ll0 elseif variant == :CoxSnell - 1 - exp(2/nobs(obj) * (ll0 - ll)) + 1 - exp(2 * (ll0 - ll) / nobs(obj)) elseif variant == :Nagelkerke - (1 - exp(2/nobs(obj) * (ll0 - ll)))/(1 - exp(2/nobs(obj) * ll0)) + (1 - exp(2 * (ll0 - ll) / obs(obj))) / (1 - exp(2 * ll0 / nobs(obj))) else error("variant must be one of :McFadden, :CoxSnell or :Nagelkerke") end @@ -252,17 +259,16 @@ adjr2(obj::StatisticalModel) = error("adjr2 is not defined for $(typeof(obj)).") Adjusted pseudo-coefficient of determination (adjusted pseudo R-squared). For nonlinear models, one of the several pseudo R² definitions must be chosen via `variant`. -The only currently supported variant is `:MacFadden`, defined as ``1 - (\\log L - k)/\\log L0``. +The only currently supported variant is `:MacFadden`, defined as ``1 - (\\log (L) - k)/\\log (L0)``. In this formula, ``L`` is the likelihood of the model, ``L0`` that of the null model (the model including only the intercept). These two quantities are taken to be minus half `deviance` of the corresponding models. ``k`` is the number of consumed degrees of freedom of the model (as returned by [`dof`](@ref)). """ function adjr2(obj::StatisticalModel, variant::Symbol) - ll = -deviance(obj)/2 - ll0 = -nulldeviance(obj)/2 + ll = loglikelihood(obj) + ll0 = nullloglikelihood(obj) k = dof(obj) - if variant == :McFadden 1 - (ll - k)/ll0 else