Skip to content

Commit

Permalink
Merge branch 'fh_integration' of https://github.com/SoerenPannier/emdi
Browse files Browse the repository at this point in the history
…into fh_integration
  • Loading branch information
SoerenPannier committed Jun 25, 2020
2 parents 77e9b49 + c2f570b commit a1893a3
Show file tree
Hide file tree
Showing 39 changed files with 452 additions and 543 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ S3method(print,emdi)
S3method(print,estimators.emdi)
S3method(print,step)
S3method(print,summary.emdi)
S3method(step,default)
S3method(step,fh)
S3method(subset,estimators.emdi)
S3method(summary,emdi)
Expand Down Expand Up @@ -130,6 +131,7 @@ importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,shapiro.test)
importFrom(stats,step)
importFrom(stats,terms)
importFrom(stats,update)
importFrom(stats,update.formula)
Expand Down
50 changes: 29 additions & 21 deletions R/FH.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@
#' \code{NULL}, seed is chosen randomly. Defaults to \code{123}.
#' @return An object of class "fh", "model" and "emdi" that provides estimators
#' for regional disaggregated indicators like means and ratios and optionally
#' corresponding MSE estimates. Generic functions such as \code{\link{estimators}},
#' \code{\link{print}}, \code{\link{plot}} and \code{\link{summary}} have methods
#' corresponding MSE estimates. Generic functions such as \code{\link{compare}},
#' \code{\link{compare_plot}}, \code{\link{estimators}}, \code{\link{print}},
#' \code{\link{plot}}, \code{\link{step}} and \code{\link{summary}} have methods
#' that can be used to obtain further information. Additionally, for the standard
#' Fay-Herriot model that is estimated via ML variance estimation a model selection
#' function is provided (\code{\link{step}}). See \code{\link{emdiObject}} for
Expand All @@ -162,9 +163,12 @@
#' \code{bc_sm} backtransformation and for the robust models. \cr
#' Out-of-sample MSEs are available for the analytical MSE estimator of the
#' standard Fay-Herriot model with reml and ml variance estimation, the crude
#' backtransformation in case of log transformation, the bootstrap MSE estimator
#' for the arcsin transformation and for the nonparametric
#' bootstrap estimator within the spatial Fay-Herriot model framework.
#' backtransformation in case of log transformation and the bootstrap MSE estimator
#' for the arcsin transformation. \cr \cr
#' For a description of how to create the proximity matrix for the
#' spatial Fay-Herriot model, see the package vignette. If the presence
#' of out-of-sample domains, the proximity matrix needs to be
#' subsetted to the in-sample domains.
#' @references
#' Chen S., Lahiri P. (2002), A weighted jackknife MSPE estimator in small-area
#' estimation, "Proceeding of the Section on Survey Research Methods", American
Expand Down Expand Up @@ -220,7 +224,7 @@
#' data("eusilcA_popAgg")
#' data("eusilcA_smpAgg")
#'
#' # Combine sample and population data -------------------------------------------
#' # Combine sample and population data
#' combined_data <- combine_data(pop_data = eusilcA_popAgg, pop_domains = "Domain",
#' smp_data = eusilcA_smpAgg, smp_domains = "Domain")
#'
Expand All @@ -236,8 +240,8 @@
#' eff_smpsize = "n", MSE = TRUE, mse_type = "boot", B = 50)
#'
#' # Example 3: Spatial Fay-Herriot model
#' # For the creation of eusilcA_proxmat, please refer to the package
#' # vignette.
#' # Load proximity matrix
#' data("eusilcA_prox")
#' fh_spatial <- fh(fixed = Mean ~ cash + self_empl, vardir = "Var_Mean",
#' combined_data = combined_data, domains = "Domain", method = "reml",
#' correlation = "spatial", corMatrix = eusilcA_prox, MSE = TRUE,
Expand Down Expand Up @@ -314,10 +318,6 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",
corMatrix = corMatrix, Ci = Ci, tol = tol,
maxit = maxit)

if (is.null(domains)) {
combined_data$Domain <- 1:nrow(combined_data)
framework$domains <- "Domain"
}

# Limits for interval
if (is.null(interval)) {
Expand All @@ -337,14 +337,14 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",
eblup <- eblup_FH(framework = framework, sigmau2 = sigmau2,
combined_data = combined_data)

Gamma <- data.frame(Domain = framework$data[[framework$domains]],
Gamma <- data.frame(Domain = framework$combined_data[[framework$domains]],
Gamma = as.numeric(eblup$gamma))
# Criteria for model selection -----------------------------------------
criteria <- model_select(framework = framework, sigmau2 = sigmau2,
method = method, interval = interval,
eblup = eblup, B = B, vardir = vardir,
transformation = transformation,
combined_data = combined_data)
combined_data = framework$combined_data)
}
if ((method == "ml" | method == "reml") & correlation == "spatial"){
# Spatial EBLUP --------------------------------------------------------
Expand All @@ -355,14 +355,20 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",
method = method, interval = interval,
eblup = eblup, B = B, vardir = vardir,
transformation = transformation,
combined_data = combined_data)
combined_data = framework$combined_data)
}
} else if (method == "me") {
# Standard EBLUP ---------------------------------------------------------
eblup <- eblup_YL(framework = framework, sigmau2 = sigmau2,
combined_data = combined_data)
Gamma <- data.frame(Domain = framework$data[[framework$domains]],
Gamma <- data.frame(Domain = framework$combined_data[[framework$domains]],
Gamma = as.numeric(eblup$gamma))
# Criteria for model selection -----------------------------------------
criteria <- model_select(framework = framework, sigmau2 = sigmau2,
method = method, interval = interval,
eblup = eblup, B = B, vardir = vardir,
transformation = transformation,
combined_data = framework$combined_data)
}


Expand All @@ -371,7 +377,8 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",

# Analytical MSE
if (MSE == TRUE) {
MSE_data <- wrapper_MSE(framework = framework, combined_data = combined_data,
MSE_data <- wrapper_MSE(framework = framework,
combined_data = framework$combined_data,
sigmau2 = sigmau2, vardir = vardir, Ci = Ci,
eblup = eblup, transformation = transformation,
method = method, interval = interval,
Expand Down Expand Up @@ -495,7 +502,7 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",
real_residuals = eblup$real_res,
std_real_residuals = eblup$std_real_res,
gamma = Gamma,
# model_select = NULL,
model_select = criteria,
correlation = correlation),
framework = framework[c("direct", "vardir", "N_dom_smp",
"N_dom_unobs")],
Expand All @@ -522,7 +529,7 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",
}

# Shrinkage factor
Gamma <- data.frame(Domain = framework$data[[framework$domains]],
Gamma <- data.frame(Domain = framework$combined_data[[framework$domains]],
Gamma = as.numeric(eblup$gamma))

# Back-transformation
Expand All @@ -531,7 +538,7 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",
eblup = eblup,
transformation = transformation,
backtransformation = backtransformation,
combined_data = combined_data,
combined_data = framework$combined_data,
method = method, interval = interval,
MSE = MSE,
mse_type = mse_type,
Expand Down Expand Up @@ -569,7 +576,8 @@ fh <- function(fixed, vardir, combined_data, domains = NULL, method = "reml",

# MSE ----------------------------------------------------------------------
if (MSE == TRUE) {
MSE_data <- wrapper_MSE(framework = framework, combined_data = combined_data,
MSE_data <- wrapper_MSE(framework = framework,
combined_data = framework$combined_data,
vardir = vardir, eblup = eblup,
mse_type = mse_type, method = method, B = B)
MSE <- MSE_data$MSE_data
Expand Down
4 changes: 2 additions & 2 deletions R/benchmark.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#' data("eusilcA_popAgg")
#' data("eusilcA_smpAgg")
#'
#' # Combine sample and population data -------------------------------------------
#' # Combine sample and population data
#' combined_data <- combine_data(pop_data = eusilcA_popAgg, pop_domains = "Domain",
#' smp_data = eusilcA_smpAgg, smp_domains = "Domain")
#'
Expand Down Expand Up @@ -96,7 +96,7 @@ benchmark <- function(object, benchmark, share, type = "raking", overwrite = FAL
# fixed = object$fixed,
# call = object$call,
# successful_bootstraps = object$successful_bootstraps)
warning("Please note that only point estimates are benchmarked. Thus, the
cat("Please note that only point estimates are benchmarked. Thus, the
MSE element in the new emdi object is NULL.")
result <- object
}
Expand Down
20 changes: 20 additions & 0 deletions R/check_fh_arguments.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,26 @@ fh_check <- function(fixed, vardir, combined_data, domains, method, interval,k,
proximity matrix. See also help(fh). A description how a proximity matrix
can be computed can be found in the vignette.')
}
if (correlation == "spatial" &&
(dim(corMatrix)[1] != dim(corMatrix)[2])) {
stop('The columns and rows of corMatrix must have the same lengths.
See also help(fh). A description how a proximity matrix
can be computed can be found in the vignette.')
}
direct <- makeXY(fixed, combined_data)$y
if (correlation == "spatial" &&
(dim(corMatrix)[1] != length(direct))) {
stop('The columns and rows of corMatrix must have the same lengths
like the number of areas. If out-of-sample areas exist, the columns
and rows of corMatrix must have the same lengths like the number
of in-sample areas. See also help(fh). A description how a proximity matrix
can be computed can be found in the vignette.')
}
if (correlation == "spatial" && (all(corMatrix == 0))) {
stop('The elements of corMatrix are all equal to 0. No
neighbourhood structure can be identified. It is suggested to
apply a standard Fay-Herriot model.')
}
estcoef <- makeXY(formula = fixed, data = combined_data)
if (method == "me" && !is.null(Ci) &&
(!(dim(Ci)[1] == dim(estcoef$x)[2]) ||
Expand Down
31 changes: 11 additions & 20 deletions R/compare.R
Original file line number Diff line number Diff line change
@@ -1,44 +1,35 @@
#' Compare function
#'
#' Function \code{compare} is a generic function used to assess the quality of
#' the model-based estimates by comparing them with the direct estimates based
#' on a goodness-of-fit test proposed by Brown et al. (2001) and by computing
#' the correlation between the regression-synthetic part of the model and the
#' direct estimates (Chandra et al. 2015).
#' the model-based estimates by comparing them with the direct estimates.
#'
#' @param model an object of type "emdi","model".
#' @param ... further arguments passed to or from other methods.
#' @return The null hypothesis, the value W of the test statistic, the degrees
#' of freedom and the p value of the Brown test and the correlation coefficient
#' of the synthetic part and the direct estimator.
#' @references
#' Brown, G., Chambers, R., Heady, P., and Heasman, D. (2001), Evaluation of small
#' area estimation methods: An application to unemployment estimates from the UK
#' LFS, Symposium 2001 - Achieving Data Quality in a Statistical Agency: A
#' Methodological Perspective, Statistics Canada. \cr \cr
#' Chandra, H., Salvati, N. and Chambers, R. (2015), A Spatially
#' Nonstationary Fay-Herriot Model for Small Area Estimation, Journal
#' of the Survey Statistics and Methodology, 3, 109-135.
#' @return The return of \code{compare} depends on the class of its argument. The
#' documentation of particular methods gives detailed information about the
#' return of that method.
#' @export

compare <- function(model, ...) UseMethod("compare")


#' Compare function
#'
#' Method \code{compare.fh} assesses the quality of the model-based estimates by
#' comparing them with the direct estimates based on a goodness-of-fit test
#' proposed by Brown et al. (2001) and by computing the correlation between the
#' proposed by \cite{Brown et al. (2001)} and by computing the correlation between the
#' regression-synthetic part of the Fay-Herriot model and the direct estimates.
#'
#' @param model an object of type "model","fh".
#' @param ... further arguments passed to or from other methods.
#' @return The null hypothesis, the value W of the test statistic, the degrees
#' of freedom and the p value of the Brown test; And the correlation coefficient
#' of the synthetic part and the direct estimator (Chandra et al. 2015).
#' of freedom and the p value of the Brown test; and the correlation coefficient
#' of the synthetic part and the direct estimator \cite{(Chandra et al. 2015)}.
#' @references
#' Brown, G., R. Chambers, P. Heady, and D. Heasman (2001). Evaluation of small
#' area estimation methods: An application to unemployment estimates from the UK
#' LFS. Symposium 2001 - Achieving Data Quality in a Statistical Agency: A
#' Methodological Perspective, Statistics Canada.
#' Methodological Perspective, Statistics Canada. \cr \cr
#' Chandra, H., Salvati, N. and Chambers, R. (2015), A Spatially
#' Nonstationary Fay-Herriot Model for Small Area Estimation, Journal
#' of the Survey Statistics and Methodology, 3, 109-135.
Expand Down Expand Up @@ -102,7 +93,7 @@ compare.fh <- function(model, ...){

if (model$framework$N_dom_unobs > 0) {
cat("Please note that the computation of both test statistics is only based
on in-sample domains.")
on in-sample domains.","\n")
}
return(results)
}
Expand Down
Loading

0 comments on commit a1893a3

Please sign in to comment.