diff --git a/src/statmodels.jl b/src/statmodels.jl
index 8151d0963..97924dae3 100644
--- a/src/statmodels.jl
+++ b/src/statmodels.jl
@@ -197,7 +197,12 @@ Coefficient of determination (R-squared).
 For a linear model, the R² is defined as ``ESS/TSS``, with ``ESS`` the explained sum of squares
 and ``TSS`` the total sum of squares.
 """
-r2(obj::StatisticalModel) = mss(obj) / deviance(obj)
+function r2(obj::StatisticalModel)
+    Base.depwarn("The default r² method for linear models is deprecated. " *
+                 "Packages should define their own methods.", :r2)
+
+    mss(obj) / deviance(obj)
+end
 
 """
     r2(obj::StatisticalModel, variant::Symbol)
@@ -207,25 +212,27 @@ Pseudo-coefficient of determination (pseudo R-squared).
 
 For nonlinear models, one of several pseudo R² definitions must be chosen via `variant`.
 Supported variants are:
-- `:MacFadden` (a.k.a. likelihood ratio index), defined as ``1 - \\log L/\\log L0``.
-- `:CoxSnell`, defined as ``1 - (L0/L)^{2/n}``
-- `:Nagelkerke`, defined as ``(1 - (L0/L)^{2/n})/(1 - L0^{2/n})``, with ``n`` the number
-of observations (as returned by [`nobs`](@ref)).
+- `:MacFadden` (a.k.a. likelihood ratio index), defined as ``1 - \\log (L)/\\log (L_0)``;
+- `:CoxSnell`, defined as ``1 - (L_0/L)^{2/n}``;
+- `:Nagelkerke`, defined as ``(1 - (L_0/L)^{2/n})/(1 - L_0^{2/n})``.
 
-In the above formulas, ``L`` is the likelihood of the model, ``L0`` that of the null model
-(the model including only the intercept). These two quantities are taken to be minus half
-`deviance` of the corresponding models.
+In the above formulas, ``L`` is the likelihood of the model,
+``L_0`` is the likelihood of the null model (the model with only an intercept),
+``n`` is the number of observations, ``y_i`` are the responses, 
+``\\hat{y}_i`` are fitted values and ``\\bar{y}`` is the average response.
+
+Cox and Snell's R² should match the classical R² for linear models.
 """
-function r2(obj::StatisticalModel, variant::Symbol)
-    ll = -deviance(obj)/2
-    ll0 = -nulldeviance(obj)/2
 
+function r2(obj::StatisticalModel, variant::Symbol)
+    ll = loglikelihood(obj)
+    ll0 = nullloglikelihood(obj)
     if variant == :McFadden
         1 - ll/ll0
     elseif variant == :CoxSnell
-        1 - exp(2/nobs(obj) * (ll0 - ll))
+        1 - exp(2 * (ll0 - ll) / nobs(obj))
     elseif variant == :Nagelkerke
-        (1 - exp(2/nobs(obj) * (ll0 - ll)))/(1 - exp(2/nobs(obj) * ll0))
+        (1 - exp(2 * (ll0 - ll) / obs(obj))) / (1 - exp(2 * ll0 / nobs(obj)))
     else
         error("variant must be one of :McFadden, :CoxSnell or :Nagelkerke")
     end
@@ -252,17 +259,16 @@ adjr2(obj::StatisticalModel) = error("adjr2 is not defined for $(typeof(obj)).")
 Adjusted pseudo-coefficient of determination (adjusted pseudo R-squared).
 
 For nonlinear models, one of the several pseudo R² definitions must be chosen via `variant`.
-The only currently supported variant is `:MacFadden`, defined as ``1 - (\\log L - k)/\\log L0``.
+The only currently supported variant is `:MacFadden`, defined as ``1 - (\\log (L) - k)/\\log (L0)``.
 In this formula, ``L`` is the likelihood of the model, ``L0`` that of the null model
 (the model including only the intercept). These two quantities are taken to be minus half
 `deviance` of the corresponding models. ``k`` is the number of consumed degrees of freedom
 of the model (as returned by [`dof`](@ref)).
 """
 function adjr2(obj::StatisticalModel, variant::Symbol)
-    ll = -deviance(obj)/2
-    ll0 = -nulldeviance(obj)/2
+    ll = loglikelihood(obj)
+    ll0 = nullloglikelihood(obj)
     k = dof(obj)
-
     if variant == :McFadden
         1 - (ll - k)/ll0
     else