diff --git a/DESCRIPTION b/DESCRIPTION index c25f63a1..f6ef82e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,58 +1,58 @@ -Package: sjstats -Type: Package -Encoding: UTF-8 -Title: Collection of Convenient Functions for Common Statistical Computations -Version: 0.9.0.9000 -Date: 2017-03-05 -Author: Daniel Lüdecke -Maintainer: Daniel Lüdecke -Description: Collection of convenient functions for common statistical computations, - which are not directly provided by R's base or stats packages. - This package aims at providing, first, shortcuts for statistical - measures, which otherwise could only be calculated with additional - effort (like standard errors or root mean squared errors). Second, - these shortcut functions are generic (if appropriate), and can be - applied not only to vectors, but also to other objects as well - (e.g., the Coefficient of Variation can be computed for vectors, - linear models, or linear mixed models; the r2()-function returns - the r-squared value for 'lm', 'glm', 'merMod' or 'lme' objects). - The focus of most functions lies on summary statistics or fit - measures for regression models, including generalized linear - models and mixed effects models. However, some of the functions - also deal with other statistical measures, like Cronbach's Alpha, - Cramer's V, Phi etc. -License: GPL-3 -Depends: - R (>= 3.2), - stats, - utils -Imports: - broom, - coin, - dplyr (>= 0.5.0), - lme4 (>= 1.1-12), - lmtest (>= 0.9-34), - MASS, - Matrix, - modelr, - nlme, - purrr (>= 0.2.2), - sandwich (>= 2.3-4), - sjmisc (>= 2.3.1), - tidyr (>= 0.6.1), - tibble (>= 1.3.0) -Suggests: - AER, - arm, - car, - ggplot2, - graphics, - Hmisc, - lmerTest, - pbkrtest (>= 0.4-7), - pROC, - pwr, - survey -URL: https://github.com/strengejacke/sjstats -BugReports: https://github.com/strengejacke/sjstats/issues -RoxygenNote: 6.0.1 +Package: sjstats +Type: Package +Encoding: UTF-8 +Title: Collection of Convenient Functions for Common Statistical Computations +Version: 0.10.0 +Date: 2017-04-10 +Author: Daniel Lüdecke +Maintainer: Daniel Lüdecke +Description: Collection of convenient functions for common statistical computations, + which are not directly provided by R's base or stats packages. + This package aims at providing, first, shortcuts for statistical + measures, which otherwise could only be calculated with additional + effort (like standard errors or root mean squared errors). Second, + these shortcut functions are generic (if appropriate), and can be + applied not only to vectors, but also to other objects as well + (e.g., the Coefficient of Variation can be computed for vectors, + linear models, or linear mixed models; the r2()-function returns + the r-squared value for 'lm', 'glm', 'merMod' or 'lme' objects). + The focus of most functions lies on summary statistics or fit + measures for regression models, including generalized linear + models and mixed effects models. However, some of the functions + also deal with other statistical measures, like Cronbach's Alpha, + Cramer's V, Phi etc. +License: GPL-3 +Depends: + R (>= 3.2), + stats, + utils +Imports: + broom, + coin, + dplyr (>= 0.5.0), + lme4 (>= 1.1-12), + lmtest (>= 0.9-34), + MASS, + Matrix, + modelr, + nlme, + purrr (>= 0.2.2), + sandwich (>= 2.3-4), + sjmisc (>= 2.4.0), + tidyr (>= 0.6.1), + tibble (>= 1.3.0) +Suggests: + AER, + arm, + car, + ggplot2, + graphics, + Hmisc, + lmerTest, + pbkrtest (>= 0.4-7), + pROC, + pwr, + survey +URL: https://github.com/strengejacke/sjstats +BugReports: https://github.com/strengejacke/sjstats/issues +RoxygenNote: 6.0.1 diff --git a/NEWS b/NEWS index 7ab45dc0..ef63457d 100644 --- a/NEWS +++ b/NEWS @@ -1,124 +1,124 @@ -Version 0.9.0.9000 ------------------------------------------------------------------------------- -New functions: -* `cv_error()` and `cv_compare()` to compute the root mean squared error for test and training data from cross-validation. -* `props()` to calculate proportions in a vector, supporting multiple logical statements. -* `or_to_rr()` to convert odds ratio estimates into risk ratio estimates. -* `mn()`, `md()` and `sm()` to calculate mean, median or sum of a vector, but using `na.rm = TRUE` as default. -* S3-generics for `svyglm.nb`-models: `family()`, `print()`, `formula()`, `model.frame()` and `predict()`. - -Bug fixes: -* Fixed error in computation of `mse()`. - -Version 0.9.0 ------------------------------------------------------------------------------- -General: -* Functions `std()` and `center()` were removed and are now in the sjmisc-package (https://cran.r-project.org/package=sjmisc). - -New functions: -* `svyglm.nb()` to compute survey-weighted negative binomial regressions. -* `xtab_statistics()` to compute various measures of assiciation for contingency tables. -* Added S3-`model.frame()`-function for `gee`-models. - -Changes to functions: -* `se()` gets a `type`-argument, which applies to generalized linear mixed models. You can now choose to compute either standard errors with delta-method approximation for fixed effects only, or standard errors for joint random and fixed effects. - -Bug fixes: -* `prop()` did not work for non-labelled data frames when used with grouped data frames. - -Version 0.8.0 ------------------------------------------------------------------------------- -New functions: -* `svy()` to compute robust standard errors for weighted models, adjusting the residual degrees of freedom to simulate sampling weights. -* `zero_count()` to check whether a poisson-model is over- or underfitting zero-counts in the outcome. -* `pred_accuracy()` to calculate accuracy of predictions from model fit. -* `outliers()` to detect outliers in (generalized) linear models. -* `heteroskedastic()` to check linear models for (non-)constant error variance. -* `autocorrelation()` to check linear models for auto-correlated residuals. -* `normality()` to check whether residuals in linear models are normally distributed or not. -* `multicollin()` to check predictors in a model for multicollinearity. -* `check_assumptions()` to run a set of model assumption checks. - -Changes to functions: -* `prop()` no longer works within dplyr's `summarise()` function. Instead, when now used with grouped data frames, a summary of proportions is directly returned as tibble. -* `se()` now computes adjusted standard errors for generalized linear (mixed) models, using the Taylor series-based delta method. - -Version 0.7.1 ------------------------------------------------------------------------------- -General: -* Package depends on R-version >= 3.3. - -Changes to functions: -* `prop()` gets a `digits`-argument to round the return value to a specific number of decimal places. - -Version 0.7.0 ------------------------------------------------------------------------------- -General: -* Largely revised the documentation. - -New functions: -* `prop()` to calculate proportion of values in a vector. -* `mse()` to calculate the mean square error for models. -* `robust()` to calculate robust standard errors and confidence intervals for regression models, returned as tidy data frame. - -Version 0.6.0 ------------------------------------------------------------------------------- -New functions: -* `split_half()` to compute the split-half-reliability of tests or questionnaires. -* `sd_pop()` and `var_pop()` to compute population variance and population standard deviation. - -Changes to functions: -* `se()` now also computes the standard error from estimates (regression coefficients) and p-values. - -Version 0.5.0 ------------------------------------------------------------------------------- -New functions: -* Added S3-`print`-method for `mwu()`-function. -* `get_model_pval()` to return a tidy data frame (tibble) of model term names, p-values and standard errors from various regression model types. -* `se_ybar()` to compute standard error of sample mean for mixed models, considering the effect of clustering on the standard error. -* `std()` and `center()` to standardize and center variables, supporting the pipe-operator. - -Changes to functions: -* `se()` now also computes the standard error for intraclass correlation coefficients, as returned by the `icc()`-function. -* `std_beta()` now always returns a tidy data frame (tibble) with model term names, standardized estimate, standard error and confidence intervals. -* `r2()` now also computes alternative omega-squared-statistics, if null model is given. - -Version 0.4.0 ------------------------------------------------------------------------------- -New functions: -* `inequ_trend()` to calculate proportional change of absolute and relative inequalities between two status groups for a vector of given prevalence rates. - -Changes to functions: -* `bootstrap()` is now much more memory efficient due to use of pointers. -* `boot_ci()`, `boot_se()` and `boot_p()` now accept multiple variables as input. -* `resp_val()` now also applies to models fitted with `nlme::lme()`. - -Version 0.3.0 ------------------------------------------------------------------------------- -General: -* Removed non-necessary checks for package-availability. - -New functions: -* `bootstrap()` to generate bootstrap replicates of data frames. -* `boot_ci()` to compute confidence intervals from bootstrapped values. -* `pred_vars()` to get the names of predictor variables from fitted models. -* `resp_var()` to get the name of the response variable from fitted models. -* `resp_val()` to get the values of the response vector from fitted models. - -Version 0.2.0 ------------------------------------------------------------------------------- -New functions: -* Added functions `weight()` and `weight2()` to weight vectors. -* Added functions `wtd_sd()` and `wtd_se()` to compute weighted standard deviations and standard errors. -* Added function `merMod_p()` to compute p-values for merMod-objects. - -Changes to functions: -* `r2()` now supports `plm` objects. - -Bug fixes: -* Fixed typo in print-method for `icc()`. - -Version 0.1.0 ------------------------------------------------------------------------------- -General: -* Initial release on CRAN. +Version 0.10.0 +------------------------------------------------------------------------------ +New functions: +* `cv_error()` and `cv_compare()` to compute the root mean squared error for test and training data from cross-validation. +* `props()` to calculate proportions in a vector, supporting multiple logical statements. +* `or_to_rr()` to convert odds ratio estimates into risk ratio estimates. +* `mn()`, `md()` and `sm()` to calculate mean, median or sum of a vector, but using `na.rm = TRUE` as default. +* S3-generics for `svyglm.nb`-models: `family()`, `print()`, `formula()`, `model.frame()` and `predict()`. + +Bug fixes: +* Fixed error in computation of `mse()`. + +Version 0.9.0 +------------------------------------------------------------------------------ +General: +* Functions `std()` and `center()` were removed and are now in the sjmisc-package (https://cran.r-project.org/package=sjmisc). + +New functions: +* `svyglm.nb()` to compute survey-weighted negative binomial regressions. +* `xtab_statistics()` to compute various measures of assiciation for contingency tables. +* Added S3-`model.frame()`-function for `gee`-models. + +Changes to functions: +* `se()` gets a `type`-argument, which applies to generalized linear mixed models. You can now choose to compute either standard errors with delta-method approximation for fixed effects only, or standard errors for joint random and fixed effects. + +Bug fixes: +* `prop()` did not work for non-labelled data frames when used with grouped data frames. + +Version 0.8.0 +------------------------------------------------------------------------------ +New functions: +* `svy()` to compute robust standard errors for weighted models, adjusting the residual degrees of freedom to simulate sampling weights. +* `zero_count()` to check whether a poisson-model is over- or underfitting zero-counts in the outcome. +* `pred_accuracy()` to calculate accuracy of predictions from model fit. +* `outliers()` to detect outliers in (generalized) linear models. +* `heteroskedastic()` to check linear models for (non-)constant error variance. +* `autocorrelation()` to check linear models for auto-correlated residuals. +* `normality()` to check whether residuals in linear models are normally distributed or not. +* `multicollin()` to check predictors in a model for multicollinearity. +* `check_assumptions()` to run a set of model assumption checks. + +Changes to functions: +* `prop()` no longer works within dplyr's `summarise()` function. Instead, when now used with grouped data frames, a summary of proportions is directly returned as tibble. +* `se()` now computes adjusted standard errors for generalized linear (mixed) models, using the Taylor series-based delta method. + +Version 0.7.1 +------------------------------------------------------------------------------ +General: +* Package depends on R-version >= 3.3. + +Changes to functions: +* `prop()` gets a `digits`-argument to round the return value to a specific number of decimal places. + +Version 0.7.0 +------------------------------------------------------------------------------ +General: +* Largely revised the documentation. + +New functions: +* `prop()` to calculate proportion of values in a vector. +* `mse()` to calculate the mean square error for models. +* `robust()` to calculate robust standard errors and confidence intervals for regression models, returned as tidy data frame. + +Version 0.6.0 +------------------------------------------------------------------------------ +New functions: +* `split_half()` to compute the split-half-reliability of tests or questionnaires. +* `sd_pop()` and `var_pop()` to compute population variance and population standard deviation. + +Changes to functions: +* `se()` now also computes the standard error from estimates (regression coefficients) and p-values. + +Version 0.5.0 +------------------------------------------------------------------------------ +New functions: +* Added S3-`print`-method for `mwu()`-function. +* `get_model_pval()` to return a tidy data frame (tibble) of model term names, p-values and standard errors from various regression model types. +* `se_ybar()` to compute standard error of sample mean for mixed models, considering the effect of clustering on the standard error. +* `std()` and `center()` to standardize and center variables, supporting the pipe-operator. + +Changes to functions: +* `se()` now also computes the standard error for intraclass correlation coefficients, as returned by the `icc()`-function. +* `std_beta()` now always returns a tidy data frame (tibble) with model term names, standardized estimate, standard error and confidence intervals. +* `r2()` now also computes alternative omega-squared-statistics, if null model is given. + +Version 0.4.0 +------------------------------------------------------------------------------ +New functions: +* `inequ_trend()` to calculate proportional change of absolute and relative inequalities between two status groups for a vector of given prevalence rates. + +Changes to functions: +* `bootstrap()` is now much more memory efficient due to use of pointers. +* `boot_ci()`, `boot_se()` and `boot_p()` now accept multiple variables as input. +* `resp_val()` now also applies to models fitted with `nlme::lme()`. + +Version 0.3.0 +------------------------------------------------------------------------------ +General: +* Removed non-necessary checks for package-availability. + +New functions: +* `bootstrap()` to generate bootstrap replicates of data frames. +* `boot_ci()` to compute confidence intervals from bootstrapped values. +* `pred_vars()` to get the names of predictor variables from fitted models. +* `resp_var()` to get the name of the response variable from fitted models. +* `resp_val()` to get the values of the response vector from fitted models. + +Version 0.2.0 +------------------------------------------------------------------------------ +New functions: +* Added functions `weight()` and `weight2()` to weight vectors. +* Added functions `wtd_sd()` and `wtd_se()` to compute weighted standard deviations and standard errors. +* Added function `merMod_p()` to compute p-values for merMod-objects. + +Changes to functions: +* `r2()` now supports `plm` objects. + +Bug fixes: +* Fixed typo in print-method for `icc()`. + +Version 0.1.0 +------------------------------------------------------------------------------ +General: +* Initial release on CRAN. diff --git a/NEWS.md b/NEWS.md index 11936116..72a6d2d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# sjstats 0.9.0.9000 +# sjstats 0.10.0 ## New functions diff --git a/R/se.R b/R/se.R index 6a0a9532..e952d693 100644 --- a/R/se.R +++ b/R/se.R @@ -1,292 +1,310 @@ -utils::globalVariables(c("strap", "models", "estimate")) - -#' @title Standard Error for variables or coefficients -#' @name se -#' @description Compute standard error for a variable, for all variables -#' of a data frame, for joint random and fixed effects -#' coefficients of (non-/linear) mixed models, the adjusted -#' standard errors for generalized linear (mixed) models, or -#' for intraclass correlation coefficients (ICC). -#' -#' @param x (Numeric) vector, a data frame, an \code{lm} or \code{glm}-object, -#' a \code{merMod}-object as returned by the functions from the -#' \pkg{lme4}-package, an ICC object (as obtained by the -#' \code{\link{icc}}-function) or a list with estimate and p-value. -#' For the latter case, the list must contain elements named -#' \code{estimate} and \code{p.value} (see 'Examples' and 'Details'). -#' @param nsim Numeric, the number of simulations for calculating the -#' standard error for intraclass correlation coefficients, as -#' obtained by the \code{\link{icc}}-function. -#' @param type Type of standard errors for generalized linear mixed models. -#' \code{type = "fe"} returns the standard errors for fixed effects, -#' based on the delta-method-approximation. \code{type = "se"} returns -#' the standard errors for joint random and fixed effects. See 'Details'. -#' -#' @return The standard error of \code{x}. -#' -#' @note Computation of standard errors for coefficients of mixed models -#' is based \href{http://stackoverflow.com/questions/26198958/extracting-coefficients-and-their-standard-error-from-lme}{on this code}. -#' Standard errors for generalized linear (mixed) models, if -#' \code{type = "re"}, are approximations based on the delta -#' method (Oehlert 1992). -#' \cr \cr -#' A remark on standard errors: -#' \dQuote{Standard error represents variation in the point estimate, but -#' confidence interval has usual Bayesian interpretation only with flat prior.} -#' (Gelman 2017) -#' -#' @details For linear mixed models, and generalized linear mixed models with -#' \code{type = "re"}, this function computes the standard errors -#' for joint (sums of) random and fixed effects coefficients (unlike -#' \code{\link[arm]{se.coef}}, which returns the standard error -#' for fixed and random effects separately). Hence, \code{se()} -#' returns the appropriate standard errors for \code{\link[lme4]{coef.merMod}}. -#' \cr \cr -#' For generalized linear models or generalized linear mixed models, -#' approximated standard errors, using the delta method for transformed -#' regression parameters are returned (Oehlert 1992). For generalized -#' linear mixed models, by default, the standard errors refer to the -#' fixed effects only. Use \code{type = "re"} to compute standard errors -#' for joint random and fixed effects coefficients. However, this -#' computation \emph{is not} based on the delta method, so standard -#' errors from \code{type = "re"} are on the logit-scale. -#' \cr \cr -#' The standard error for the \code{\link{icc}} is based on bootstrapping, -#' thus, the \code{nsim}-argument is required. See 'Examples'. -#' \cr \cr -#' \code{se()} also returns the standard error of an estimate (regression -#' coefficient) and p-value, assuming a normal distribution to compute -#' the z-score from the p-value (formula in short: \code{b / qnorm(p / 2)}). -#' See 'Examples'. -#' -#' @references Oehlert GW. 1992. A note on the delta method. American Statistician 46(1). -#' \cr \cr -#' Gelman A 2017. How to interpret confidence intervals? \url{http://andrewgelman.com/2017/03/04/interpret-confidence-intervals/} -#' -#' @examples -#' # compute standard error for vector -#' se(rnorm(n = 100, mean = 3)) -#' -#' # compute standard error for each variable in a data frame -#' data(efc) -#' se(efc[, 1:3]) -#' -#' # compute standard error for merMod-coefficients -#' library(lme4) -#' fit <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) -#' se(fit) -#' -#' # compute odds-ratio adjusted standard errors, based on delta method -#' # with first-order Taylor approximation. -#' data(efc) -#' efc$services <- sjmisc::dicho(efc$tot_sc_e, dich.by = 0) -#' fit <- glm(services ~ neg_c_7 + c161sex + e42dep, -#' data = efc, family = binomial(link = "logit")) -#' se(fit) -#' -#' # compute odds-ratio adjusted standard errors for generalized -#' # linear mixed model, also based on delta method -#' library(lme4) -#' library(sjmisc) -#' # create binary response -#' sleepstudy$Reaction.dicho <- dicho(sleepstudy$Reaction, dich.by = "median") -#' fit <- glmer(Reaction.dicho ~ Days + (Days | Subject), -#' data = sleepstudy, family = binomial("logit")) -#' se(fit) -#' -#' # compute standard error from regression coefficient and p-value -#' se(list(estimate = .3, p.value = .002)) -#' -#' \dontrun{ -#' # compute standard error of ICC for the linear mixed model -#' icc(fit) -#' se(icc(fit)) -#' -#' # the standard error for the ICC can be computed manually in this way, -#' # taking the fitted model example from above -#' library(dplyr) -#' dummy <- sleepstudy %>% -#' # generate 100 bootstrap replicates of dataset -#' bootstrap(100) %>% -#' # run mixed effects regression on each bootstrap replicate -#' mutate(models = lapply(.$strap, function(x) { -#' lmer(Reaction ~ Days + (Days | Subject), data = x) -#' })) %>% -#' # compute ICC for each "bootstrapped" regression -#' mutate(icc = unlist(lapply(.$models, icc))) -#' # now compute SE and p-values for the bootstrapped ICC, values -#' # may differ from above example due to random seed -#' boot_se(dummy, icc) -#' boot_p(dummy, icc)} -#' -#' -#' @importFrom stats qnorm vcov -#' @importFrom broom tidy -#' @importFrom dplyr mutate select_ -#' @export -se <- function(x, nsim = 100, type = c("fe", "re")) { - # match arguments - type <- match.arg(type) - - if (inherits(x, c("lmerMod", "nlmerMod", "merModLmerTest"))) { - # return standard error for (linear) mixed models - return(std_merMod(x)) - } else if (inherits(x, "icc.lme4")) { - # we have a ICC object, so do bootstrapping and compute SE for ICC - return(std_e_icc(x, nsim)) - } else if (inherits(x, c("svyglm.nb", "svymle"))) { - return( - tidy_svyglm.nb(x) %>% - dplyr::select_("term", "estimate", "std.error") - ) - } else if (inherits(x, c("glm", "glmerMod"))) { - # check type of se - if (type == "fe") { - # for glm, we want to exponentiate coefficients to get odds ratios, however - # 'exponentiate'-argument currently not works for lme4-tidiers - # so we need to do this manually for glmer's - tm <- broom::tidy(x, effects = "fixed") - tm$estimate <- exp(tm$estimate) - return( - tm %>% - # vcov for merMod returns a dpoMatrix-object, so we need - # to coerce to regular matrix here. - dplyr::mutate(or.se = sqrt(estimate ^ 2 * diag(as.matrix(stats::vcov(x))))) %>% - dplyr::select_("term", "estimate", "or.se") %>% - sjmisc::var_rename(or.se = "std.error") - ) - } else { - # return standard error for mixed models, - # joint random and fixed effects - return(std_merMod(x)) - } - } else if (inherits(x, "lm")) { - # for convenience reasons, also return se for simple linear models - return(x %>% - broom::tidy(effects = "fixed") %>% - dplyr::select_("term", "estimate", "std.error") - ) - } else if (is.matrix(x) || is.data.frame(x)) { - # init return variables - stde <- c() - stde_names <- c() - - # iterate all columns - for (i in seq_len(ncol(x))) { - # get and save standard error for each variable - # of the data frame - stde <- c(stde, std_e_helper(x[[i]])) - # save column name as variable name - stde_names <- c(stde_names, colnames(x)[i]) - } - - # set names to return vector - names(stde) <- stde_names - - # return results - return(stde) - } else if (is.list(x)) { - # compute standard error from regression coefficient and p-value - return(x$estimate / abs(stats::qnorm(x$p.value / 2))) - } else { - # standard error for a variable - return(std_e_helper(x)) - } -} - -std_e_helper <- function(x) sqrt(var(x, na.rm = TRUE) / length(stats::na.omit(x))) - - -#' @importFrom stats coef setNames vcov -#' @importFrom lme4 ranef -std_merMod <- function(fit) { - se.merMod <- list() - - # get coefficients - cc <- stats::coef(fit) - - # get names of intercepts - inames <- names(cc) - - # variances of fixed effects - fixed.vars <- diag(as.matrix(stats::vcov(fit))) - - # extract variances of conditional modes - r1 <- lme4::ranef(fit, condVar = TRUE) - - # we may have multiple random intercepts, iterate all - for (i in seq_len(length(cc))) { - cmode.vars <- t(apply(attr(r1[[i]], "postVar"), 3, diag)) - seVals <- sqrt(sweep(cmode.vars, 2, fixed.vars, "+")) - - # add results to return list - se.merMod[[length(se.merMod) + 1]] <- stats::setNames(as.vector(seVals[1, ]), - c("intercept_se", "slope_se")) - } - - # set names of list - names(se.merMod) <- inames - return(se.merMod) -} - - - -#' @importFrom dplyr "%>%" -#' @importFrom stats model.frame -std_e_icc <- function(x, nsim) { - # check whether model is still in environment? - obj.name <- attr(x, ".obj.name", exact = T) - if (!exists(obj.name, envir = globalenv())) - stop(sprintf("Can't find merMod-object `%s` (that was used to compute the ICC) in the environment.", obj.name), call. = F) - - # get object, see whether formulas match - fitted.model <- globalenv()[[obj.name]] - model.formula <- attr(x, "formula", exact = T) - if (!identical(model.formula, formula(fitted.model))) - stop(sprintf("merMod-object `%s` was fitted with a different formula than ICC-model", obj.name), call. = F) - - # get model family, we may have glmer - model.family <- attr(x, "family", exact = T) - - # check for all required arguments - if (missing(nsim) || is.null(nsim)) nsim <- 100 - - # get ICC, and compute bootstrapped SE, than return both - bstr <- bootstr_icc_se(stats::model.frame(fitted.model), nsim, model.formula, model.family) - - # now compute SE and p-values for the bootstrapped ICC - res <- data.frame(model = obj.name, - icc = as.vector(x), - std.err = boot_se(bstr, icc)[["std.err"]], - p.value = boot_p(bstr, icc)[["p.value"]]) - structure(class = "se.icc.lme4", list(result = res, bootstrap_data = bstr)) -} - - - -#' @importFrom dplyr mutate -#' @importFrom lme4 lmer glmer -#' @importFrom utils txtProgressBar -bootstr_icc_se <- function(dd, nsim, formula, model.family) { - # create progress bar - pb <- utils::txtProgressBar(min = 1, max = nsim, style = 3) - - # generate bootstraps - dummy <- dd %>% - bootstrap(nsim) %>% - dplyr::mutate(models = lapply(strap, function(x) { - # update progress bar - utils::setTxtProgressBar(pb, x$resample.id) - # check model family, then compute mixed model - if (model.family == "gaussian") - lme4::lmer(formula, data = x) - else - lme4::glmer(formula, data = x, family = model.family) - })) %>% - # compute ICC for each "bootstrapped" regression - dplyr::mutate(icc = unlist(lapply(models, icc))) - - # close progresss bar - close(pb) - return(dummy) -} +utils::globalVariables(c("strap", "models", "estimate")) + +#' @title Standard Error for variables or coefficients +#' @name se +#' @description Compute standard error for a variable, for all variables +#' of a data frame, for joint random and fixed effects +#' coefficients of (non-/linear) mixed models, the adjusted +#' standard errors for generalized linear (mixed) models, or +#' for intraclass correlation coefficients (ICC). +#' +#' @param x (Numeric) vector, a data frame, an \code{lm} or \code{glm}-object, +#' a \code{merMod}-object as returned by the functions from the +#' \pkg{lme4}-package, an ICC object (as obtained by the +#' \code{\link{icc}}-function) or a list with estimate and p-value. +#' For the latter case, the list must contain elements named +#' \code{estimate} and \code{p.value} (see 'Examples' and 'Details'). +#' @param nsim Numeric, the number of simulations for calculating the +#' standard error for intraclass correlation coefficients, as +#' obtained by the \code{\link{icc}}-function. +#' @param type Type of standard errors for generalized linear mixed models. +#' \code{type = "fe"} returns the standard errors for fixed effects, +#' based on the delta-method-approximation. \code{type = "se"} returns +#' the standard errors for joint random and fixed effects. See 'Details'. +#' +#' @return The standard error of \code{x}. +#' +#' @note Computation of standard errors for coefficients of mixed models +#' is based \href{http://stackoverflow.com/questions/26198958/extracting-coefficients-and-their-standard-error-from-lme}{on this code}. +#' Standard errors for generalized linear (mixed) models, if +#' \code{type = "re"}, are approximations based on the delta +#' method (Oehlert 1992). +#' \cr \cr +#' A remark on standard errors: +#' \dQuote{Standard error represents variation in the point estimate, but +#' confidence interval has usual Bayesian interpretation only with flat prior.} +#' (Gelman 2017) +#' +#' @details For linear mixed models, and generalized linear mixed models with +#' \code{type = "re"}, this function computes the standard errors +#' for joint (sums of) random and fixed effects coefficients (unlike +#' \code{\link[arm]{se.coef}}, which returns the standard error +#' for fixed and random effects separately). Hence, \code{se()} +#' returns the appropriate standard errors for \code{\link[lme4]{coef.merMod}}. +#' \cr \cr +#' For generalized linear models or generalized linear mixed models, +#' approximated standard errors, using the delta method for transformed +#' regression parameters are returned (Oehlert 1992). For generalized +#' linear mixed models, by default, the standard errors refer to the +#' fixed effects only. Use \code{type = "re"} to compute standard errors +#' for joint random and fixed effects coefficients. However, this +#' computation \emph{is not} based on the delta method, so standard +#' errors from \code{type = "re"} are on the logit-scale. +#' \cr \cr +#' The standard error for the \code{\link{icc}} is based on bootstrapping, +#' thus, the \code{nsim}-argument is required. See 'Examples'. +#' \cr \cr +#' \code{se()} also returns the standard error of an estimate (regression +#' coefficient) and p-value, assuming a normal distribution to compute +#' the z-score from the p-value (formula in short: \code{b / qnorm(p / 2)}). +#' See 'Examples'. +#' +#' @references Oehlert GW. 1992. A note on the delta method. American Statistician 46(1). +#' \cr \cr +#' Gelman A 2017. How to interpret confidence intervals? \url{http://andrewgelman.com/2017/03/04/interpret-confidence-intervals/} +#' +#' @examples +#' # compute standard error for vector +#' se(rnorm(n = 100, mean = 3)) +#' +#' # compute standard error for each variable in a data frame +#' data(efc) +#' se(efc[, 1:3]) +#' +#' # compute standard error for merMod-coefficients +#' library(lme4) +#' fit <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) +#' se(fit) +#' +#' # compute odds-ratio adjusted standard errors, based on delta method +#' # with first-order Taylor approximation. +#' data(efc) +#' efc$services <- sjmisc::dicho(efc$tot_sc_e, dich.by = 0) +#' fit <- glm(services ~ neg_c_7 + c161sex + e42dep, +#' data = efc, family = binomial(link = "logit")) +#' se(fit) +#' +#' # compute odds-ratio adjusted standard errors for generalized +#' # linear mixed model, also based on delta method +#' library(lme4) +#' library(sjmisc) +#' # create binary response +#' sleepstudy$Reaction.dicho <- dicho(sleepstudy$Reaction, dich.by = "median") +#' fit <- glmer(Reaction.dicho ~ Days + (Days | Subject), +#' data = sleepstudy, family = binomial("logit")) +#' se(fit) +#' +#' # compute standard error from regression coefficient and p-value +#' se(list(estimate = .3, p.value = .002)) +#' +#' \dontrun{ +#' # compute standard error of ICC for the linear mixed model +#' icc(fit) +#' se(icc(fit)) +#' +#' # the standard error for the ICC can be computed manually in this way, +#' # taking the fitted model example from above +#' library(dplyr) +#' dummy <- sleepstudy %>% +#' # generate 100 bootstrap replicates of dataset +#' bootstrap(100) %>% +#' # run mixed effects regression on each bootstrap replicate +#' mutate(models = lapply(.$strap, function(x) { +#' lmer(Reaction ~ Days + (Days | Subject), data = x) +#' })) %>% +#' # compute ICC for each "bootstrapped" regression +#' mutate(icc = unlist(lapply(.$models, icc))) +#' # now compute SE and p-values for the bootstrapped ICC, values +#' # may differ from above example due to random seed +#' boot_se(dummy, icc) +#' boot_p(dummy, icc)} +#' +#' +#' @importFrom stats qnorm vcov +#' @importFrom broom tidy +#' @importFrom dplyr mutate select_ +#' @export +se <- function(x, nsim = 100, type = c("fe", "re")) { + # match arguments + type <- match.arg(type) + + if (inherits(x, c("lmerMod", "nlmerMod", "merModLmerTest"))) { + # return standard error for (linear) mixed models + return(std_merMod(x)) + } else if (inherits(x, "icc.lme4")) { + # we have a ICC object, so do bootstrapping and compute SE for ICC + return(std_e_icc(x, nsim)) + } else if (inherits(x, c("svyglm.nb", "svymle"))) { + return( + tidy_svyglm.nb(x) %>% + dplyr::select_("term", "estimate", "std.error") + ) + } else if (inherits(x, c("glm", "glmerMod"))) { + # check type of se + if (type == "fe") { + # for glm, we want to exponentiate coefficients to get odds ratios, however + # 'exponentiate'-argument currently not works for lme4-tidiers + # so we need to do this manually for glmer's + tm <- broom::tidy(x, effects = "fixed") + tm$estimate <- exp(tm$estimate) + + # # for poisson family, we need a different delta method approach + # if (get_glm_family(x)$is_pois) { + # # standard errors scaled using square root of Pearson + # # chi-squared dispersion + # pr <- sum(stats::residuals(x, type = "pearson") ^ 2) + # dispersion <- pr / x$df.residual + # sse <- sqrt(diag(as.matrix(stats::vcov(x)))) * sqrt(dispersion) + # + # return( + # tm %>% + # # vcov for merMod returns a dpoMatrix-object, so we need + # # to coerce to regular matrix here. + # dplyr::mutate(std.error = sse) %>% + # dplyr::select_("term", "estimate", "std.error") + # ) + # } + + return( + tm %>% + # vcov for merMod returns a dpoMatrix-object, so we need + # to coerce to regular matrix here. + dplyr::mutate(or.se = sqrt(estimate ^ 2 * diag(as.matrix(stats::vcov(x))))) %>% + dplyr::select_("term", "estimate", "or.se") %>% + sjmisc::var_rename(or.se = "std.error") + ) + } else { + # return standard error for mixed models, + # joint random and fixed effects + return(std_merMod(x)) + } + } else if (inherits(x, "lm")) { + # for convenience reasons, also return se for simple linear models + return(x %>% + broom::tidy(effects = "fixed") %>% + dplyr::select_("term", "estimate", "std.error") + ) + } else if (is.matrix(x) || is.data.frame(x)) { + # init return variables + stde <- c() + stde_names <- c() + + # iterate all columns + for (i in seq_len(ncol(x))) { + # get and save standard error for each variable + # of the data frame + stde <- c(stde, std_e_helper(x[[i]])) + # save column name as variable name + stde_names <- c(stde_names, colnames(x)[i]) + } + + # set names to return vector + names(stde) <- stde_names + + # return results + return(stde) + } else if (is.list(x)) { + # compute standard error from regression coefficient and p-value + return(x$estimate / abs(stats::qnorm(x$p.value / 2))) + } else { + # standard error for a variable + return(std_e_helper(x)) + } +} + +std_e_helper <- function(x) sqrt(var(x, na.rm = TRUE) / length(stats::na.omit(x))) + + +#' @importFrom stats coef setNames vcov +#' @importFrom lme4 ranef +std_merMod <- function(fit) { + se.merMod <- list() + + # get coefficients + cc <- stats::coef(fit) + + # get names of intercepts + inames <- names(cc) + + # variances of fixed effects + fixed.vars <- diag(as.matrix(stats::vcov(fit))) + + # extract variances of conditional modes + r1 <- lme4::ranef(fit, condVar = TRUE) + + # we may have multiple random intercepts, iterate all + for (i in seq_len(length(cc))) { + cmode.vars <- t(apply(attr(r1[[i]], "postVar"), 3, diag)) + seVals <- sqrt(sweep(cmode.vars, 2, fixed.vars, "+")) + + # add results to return list + se.merMod[[length(se.merMod) + 1]] <- stats::setNames(as.vector(seVals[1, ]), + c("intercept_se", "slope_se")) + } + + # set names of list + names(se.merMod) <- inames + return(se.merMod) +} + + + +#' @importFrom dplyr "%>%" +#' @importFrom stats model.frame +std_e_icc <- function(x, nsim) { + # check whether model is still in environment? + obj.name <- attr(x, ".obj.name", exact = T) + if (!exists(obj.name, envir = globalenv())) + stop(sprintf("Can't find merMod-object `%s` (that was used to compute the ICC) in the environment.", obj.name), call. = F) + + # get object, see whether formulas match + fitted.model <- globalenv()[[obj.name]] + model.formula <- attr(x, "formula", exact = T) + if (!identical(model.formula, formula(fitted.model))) + stop(sprintf("merMod-object `%s` was fitted with a different formula than ICC-model", obj.name), call. = F) + + # get model family, we may have glmer + model.family <- attr(x, "family", exact = T) + + # check for all required arguments + if (missing(nsim) || is.null(nsim)) nsim <- 100 + + # get ICC, and compute bootstrapped SE, than return both + bstr <- bootstr_icc_se(stats::model.frame(fitted.model), nsim, model.formula, model.family) + + # now compute SE and p-values for the bootstrapped ICC + res <- data.frame(model = obj.name, + icc = as.vector(x), + std.err = boot_se(bstr, icc)[["std.err"]], + p.value = boot_p(bstr, icc)[["p.value"]]) + structure(class = "se.icc.lme4", list(result = res, bootstrap_data = bstr)) +} + + + +#' @importFrom dplyr mutate +#' @importFrom lme4 lmer glmer +#' @importFrom utils txtProgressBar +bootstr_icc_se <- function(dd, nsim, formula, model.family) { + # create progress bar + pb <- utils::txtProgressBar(min = 1, max = nsim, style = 3) + + # generate bootstraps + dummy <- dd %>% + bootstrap(nsim) %>% + dplyr::mutate(models = lapply(strap, function(x) { + # update progress bar + utils::setTxtProgressBar(pb, x$resample.id) + # check model family, then compute mixed model + if (model.family == "gaussian") + lme4::lmer(formula, data = x) + else + lme4::glmer(formula, data = x, family = model.family) + })) %>% + # compute ICC for each "bootstrapped" regression + dplyr::mutate(icc = unlist(lapply(models, icc))) + + # close progresss bar + close(pb) + return(dummy) +}