diff --git a/NAMESPACE b/NAMESPACE index d5e8f2778..a64aaed84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand S3method("+",dist_spec) +S3method(c,dist_spec) +S3method(max,dist_spec) S3method(mean,dist_spec) S3method(plot,dist_spec) S3method(plot,epinow) @@ -10,9 +12,15 @@ S3method(plot,estimate_truncation) S3method(print,dist_spec) S3method(summary,epinow) S3method(summary,estimate_infections) +export(Fixed) +export(Gamma) +export(LogNormal) +export(NonParametric) +export(Normal) export(R_to_growth) export(add_day_of_week) export(adjust_infection_to_report) +export(apply_tolerance) export(backcalc_opts) export(bootstrapped_dist_fit) export(calc_CrI) @@ -20,6 +28,7 @@ export(calc_CrIs) export(calc_summary_measures) export(calc_summary_stats) export(clean_nowcasts) +export(collapse) export(construct_output) export(convert_to_logmean) export(convert_to_logsd) @@ -35,6 +44,8 @@ export(create_shifted_cases) export(create_stan_args) export(create_stan_data) export(delay_opts) +export(discretise) +export(discretize) export(dist_fit) export(dist_skel) export(dist_spec) @@ -195,16 +206,19 @@ importFrom(posterior,mcse_mean) importFrom(progressr,progressor) importFrom(progressr,with_progress) importFrom(purrr,compact) +importFrom(purrr,flatten) importFrom(purrr,keep) importFrom(purrr,list_transpose) importFrom(purrr,map) importFrom(purrr,map2_dbl) importFrom(purrr,map_chr) importFrom(purrr,map_dbl) +importFrom(purrr,map_dfc) importFrom(purrr,pmap_dbl) importFrom(purrr,quietly) importFrom(purrr,reduce) importFrom(purrr,safely) +importFrom(purrr,transpose) importFrom(purrr,walk) importFrom(rlang,abort) importFrom(rlang,arg_match) diff --git a/NEWS.md b/NEWS.md index ece933c53..e613395f5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ * `simulate_infections` has been renamed to `forecast_infections` in line with `simulate_secondary` and `forecast_secondary`. The terminology is: a forecast is done from a fit to existing data, a simulation from first principles. By @sbfnk in #544 and reviewed by @seabbs. * A new `simulate_infections` function has been added that can be used to simulate from the model from given initial conditions and parameters. By @sbfnk in #557 and reviewed by @jamesmbaazam. * The function `init_cumulative_fit()` has been deprecated. By @jamesmbaazam in #541 and reviewed by @sbfnk. +* The interface to generating delay distributions has been completely overhauled. Instead of calling `dist_spec()` users now specify distributions using functions that represent the available distributions, i.e. `LogNormal()`, `Gamma()` and `Fixed()`. Uncertainty is specified using calls of the same nature, to `Normal()`. More information on the underlying design can be found in `inst/dev/design_dist.md` By @sbfnk in #504 and reviewed by @seabbs. ## Documentation diff --git a/R/adjust.R b/R/adjust.R index d1352689c..87ded8c27 100644 --- a/R/adjust.R +++ b/R/adjust.R @@ -85,6 +85,7 @@ adjust_infection_to_report <- function(infections, delay_defs, # Reset DT Defaults on Exit set_dt_single_thread() + ## deprecated sample_single_dist <- function(input, delay_def) { ## Define sample delay fn sample_delay_fn <- function(n, ...) { @@ -111,14 +112,50 @@ adjust_infection_to_report <- function(infections, delay_defs, return(out) } - report <- sample_single_dist(infections, delay_defs[[1]]) - - if (length(delay_defs) > 1) { - for (def in 2:length(delay_defs)) { - report <- sample_single_dist(report, delay_defs[[def]]) + sample_dist_spec <- function(input, delay_def) { + ## Define sample delay fn + sample_delay_fn <- function(n, dist, cum, ...) { + fixed_dist <- discretise(fix_dist(delay_def, strategy = "sample")) + if (dist) { + fixed_dist[[1]]$pmf[n + 1] + } else { + sample(seq_along(fixed_dist[[1]]$pmf) - 1, size = n, replace = TRUE) + } } + + ## Infection to onset + out <- EpiNow2::sample_approx_dist( + cases = input, + dist_fn = sample_delay_fn, + max_value = max(delay_def), + direction = "forwards", + type = type, + truncate_future = FALSE + ) + + return(out) } + if (is(delay_defs, "dist_spec")) { + report <- sample_dist_spec(infections, extract_single_dist(delay_defs, 1)) + if (length(delay_defs) > 1) { + for (def in seq(2, length(delay_defs))) { + report <- sample_dist_spec(report, extract_single_dist(delay_defs, def)) + } + } + } else { + deprecate_warn( + "1.5.0", + "adjust_infection_to_report(delay_defs = 'should be a dist_spec')", + details = "Specifying this as a list of data tables is deprecated." + ) + report <- sample_single_dist(infections, delay_defs[[1]]) + if (length(delay_defs) > 1) { + for (def in 2:length(delay_defs)) { + report <- sample_single_dist(report, delay_defs[[def]]) + } + } + } ## Add a weekly reporting effect if present if (!missing(reporting_effect)) { reporting_effect <- data.table::data.table( @@ -146,3 +183,188 @@ adjust_infection_to_report <- function(infections, delay_defs, } return(report) } + +#' Approximate Sampling a Distribution using Counts +#' +#' @description `r lifecycle::badge("soft-deprecated")` +#' Convolves cases by a PMF function. This function will soon be removed or +#' replaced with a more robust stan implementation. +#' +#' @param cases A `` of cases (in date order) with the following +#' variables: `date` and `cases`. +#' +#' @param max_value Numeric, maximum value to allow. Defaults to 120 days +#' +#' @param direction Character string, defato "backwards". Direction in which to +#' map cases. Supports either "backwards" or "forwards". +#' +#' @param dist_fn Function that takes two arguments with the first being +#' numeric and the second being logical (and defined as `dist`). Should return +#' the probability density or a sample from the defined distribution. See +#' the examples for more. +#' +#' @param earliest_allowed_mapped A character string representing a date +#' ("2020-01-01"). Indicates the earliest allowed mapped value. +#' +#' @param type Character string indicating the method to use to transform +#' counts. Supports either "sample" which approximates sampling or "median" +#' would shift by the median of the distribution. +#' +#' @param truncate_future Logical, should cases be truncated if they occur +#' after the first date reported in the data. Defaults to `TRUE`. +#' +#' @return A `` of cases by date of onset +#' @export +#' @importFrom purrr map_dfc +#' @importFrom data.table data.table setorder +#' @importFrom lubridate days +#' @examples +#' \donttest{ +#' cases <- example_confirmed +#' cases <- cases[, cases := as.integer(confirm)] +#' print(cases) +#' +#' # total cases +#' sum(cases$cases) +#' +#' delay_fn <- function(n, dist, cum) { +#' if (dist) { +#' pgamma(n + 0.9999, 2, 1) - pgamma(n - 1e-5, 2, 1) +#' } else { +#' as.integer(rgamma(n, 2, 1)) +#' } +#' } +#' +#' onsets <- sample_approx_dist( +#' cases = cases, +#' dist_fn = delay_fn +#' ) +#' +#' # estimated onset distribution +#' print(onsets) +#' +#' # check that sum is equal to reported cases +#' total_onsets <- median( +#' purrr::map_dbl( +#' 1:10, +#' ~ sum(sample_approx_dist( +#' cases = cases, +#' dist_fn = delay_fn +#' )$cases) +#' ) +#' ) +#' total_onsets +#' +#' +#' # map from onset cases to reported +#' reports <- sample_approx_dist( +#' cases = cases, +#' dist_fn = delay_fn, +#' direction = "forwards" +#' ) +#' +#' +#' # map from onset cases to reported using a mean shift +#' reports <- sample_approx_dist( +#' cases = cases, +#' dist_fn = delay_fn, +#' direction = "forwards", +#' type = "median" +#' ) +#' } +sample_approx_dist <- function(cases = NULL, + dist_fn = NULL, + max_value = 120, + earliest_allowed_mapped = NULL, + direction = "backwards", + type = "sample", + truncate_future = TRUE) { + if (type == "sample") { + if (direction == "backwards") { + direction_fn <- rev + } else if (direction == "forwards") { + direction_fn <- function(x) { + x + } + } + # reverse cases so starts with current first + reversed_cases <- direction_fn(cases$cases) + reversed_cases[is.na(reversed_cases)] <- 0 + # draw from the density fn of the dist + draw <- dist_fn(0:max_value, dist = TRUE, cum = FALSE) + + # approximate cases + mapped_cases <- do.call(cbind, purrr::map( + seq_along(reversed_cases), + ~ c( + rep(0, . - 1), + stats::rbinom( + length(draw), + rep(reversed_cases[.], length(draw)), + draw + ), + rep(0, length(reversed_cases) - .) + ) + )) + + + # set dates order based on direction mapping + if (direction == "backwards") { + dates <- seq(min(cases$date) - lubridate::days(length(draw) - 1), + max(cases$date), + by = "days" + ) + } else if (direction == "forwards") { + dates <- seq(min(cases$date), + max(cases$date) + lubridate::days(length(draw) - 1), + by = "days" + ) + } + + # summarises movements and sample for placement of non-integer cases + case_sum <- direction_fn(rowSums(mapped_cases)) + floor_case_sum <- floor(case_sum) + sample_cases <- floor_case_sum + + as.numeric((runif(seq_along(case_sum)) < (case_sum - floor_case_sum))) + + # summarise imputed onsets and build output data.table + mapped_cases <- data.table::data.table( + date = dates, + cases = sample_cases + ) + + # filter out all zero cases until first recorded case + mapped_cases <- data.table::setorder(mapped_cases, date) + mapped_cases <- mapped_cases[ + , + cum_cases := cumsum(cases) + ][cum_cases != 0][, cum_cases := NULL] + } else if (type == "median") { + shift <- as.integer( + median(as.integer(dist_fn(1000, dist = FALSE)), na.rm = TRUE) + ) + + if (direction == "backwards") { + mapped_cases <- data.table::copy(cases)[ + , + date := date - lubridate::days(shift) + ] + } else if (direction == "forwards") { + mapped_cases <- data.table::copy(cases)[ + , + date := date + lubridate::days(shift) + ] + } + } + + if (!is.null(earliest_allowed_mapped)) { + mapped_cases <- mapped_cases[date >= as.Date(earliest_allowed_mapped)] + } + + # filter out future cases + if (direction == "forwards" && truncate_future) { + max_date <- max(cases$date) + mapped_cases <- mapped_cases[date <= max_date] + } + return(mapped_cases) +} diff --git a/R/checks.R b/R/checks.R index b25a372bd..ac590a10e 100644 --- a/R/checks.R +++ b/R/checks.R @@ -53,3 +53,53 @@ check_reports_valid <- function(reports, model) { assert_numeric(reports$confirm, lower = 0) } } + +#' Validate probability distribution for passing to stan +#' +#' @description +#' `check_stan_delay()` checks that the supplied data is a ``, +#' that it is a supported distribution, and that is has a finite maximum. +#' +#' @param dist A `dist_spec` object.` +#' @importFrom checkmate assert_class +#' @importFrom rlang arg_match +#' @return Called for its side effects. +#' @keywords internal +check_stan_delay <- function(dist) { + # Check that `dist` is a `dist_spec` + assert_class(dist, "dist_spec") + # Check that `dist` is lognormal or gamma or nonparametric + distributions <- vapply(dist, function(x) x$distribution, character(1)) + if ( + !all(distributions %in% c("lognormal", "gamma", "fixed", "nonparametric")) + ) { + stop( + "Distributions passed to the model need to be lognormal, gamma, fixed ", + "or nonparametric." + ) + } + # Check that `dist` has parameters that are either numeric or normal + # distributions with numeric parameters and infinite maximum + numeric_parameters <- vapply(dist$parameters, is.numeric, logical(1)) + normal_parameters <- vapply( + dist$parameters, + function(x) { + is(x, "dist_spec") && + x$distribution == "normal" && + all(vapply(x$parameters, is.numeric, logical(1))) && + is.infinite(x$max) + }, + logical(1) + ) + if (!all(numeric_parameters | normal_parameters)) { + stop( + "Delay distributions passed to the model need to have parameters that ", + "are either numeric or normally distributed with numeric parameters ", + "and infinite maximum." + ) + } + # Check that `dist` has a finite maximum + if (any(is.infinite(max(dist)))) { + stop("All distribution passed to the model need to have a finite maximum") + } +} diff --git a/R/create.R b/R/create.R index b5ead963f..410ee451f 100644 --- a/R/create.R +++ b/R/create.R @@ -516,6 +516,24 @@ create_stan_data <- function(reported_cases, seeding_time, return(data) } +##' Create initial conditions for delays +##' +##' @inheritParams create_initial_conditions +##' @return A list of initial conditions for delays +##' @keywords internal +create_delay_inits <- function(data) { + out <- list() + if (data$delay_n_p > 0) { + out$delay_params <- array(truncnorm::rtruncnorm( + n = data$delay_params_length, a = data$delay_params_lower, + mean = data$delay_params_mean, sd = data$delay_params_sd * 0.1 + )) + } else { + out$delay_params <- array(numeric(0)) + } + return(out) +} + #' Create Initial Conditions Generating Function #' @description `r lifecycle::badge("stable")` #' Uses the output of [create_stan_data()] to create a function which can be @@ -530,23 +548,7 @@ create_stan_data <- function(reported_cases, seeding_time, #' @export create_initial_conditions <- function(data) { init_fun <- function() { - out <- list() - if (data$delay_n_p > 0) { - lower_bounds <- rep(-Inf, data$delay_n_p) - ## gamma - lower_bounds[data$dist == 1] <- 0 - out$delay_mean <- array(truncnorm::rtruncnorm( - n = data$delay_n_p, a = lower_bounds, - mean = data$delay_mean_mean, sd = data$delay_mean_sd * 0.1 - )) - out$delay_sd <- array(truncnorm::rtruncnorm( - n = data$delay_n_p, a = 0, - mean = data$delay_sd_mean, sd = data$delay_sd_sd * 0.1 - )) - } else { - out$delay_mean <- array(numeric(0)) - out$delay_sd <- array(numeric(0)) - } + out <- create_delay_inits(data) if (data$fixed == 0) { out$eta <- array(rnorm(data$M, mean = 0, sd = 0.1)) @@ -688,29 +690,51 @@ create_stan_args <- function(stan = stan_opts(), ##' Create delay variables for stan ##' -##' @param ... Named delay distributions specified using `dist_spec()`. -##' The names are assigned to IDs +##' @param ... Named delay distributions. The names are assigned to IDs ##' @param weight Numeric, weight associated with delay priors; default: 1 ##' @return A list of variables as expected by the stan model -##' @importFrom purrr list_transpose map +##' @importFrom purrr transpose map flatten create_stan_delays <- function(..., weight = 1) { - dot_args <- list(...) - ## combine delays - combined_delays <- unclass(c(...)) + ## discretise + delays <- map(list(...), discretise) + ## convolve where appropriate + delays <- map(delays, collapse) + ## apply tolerance + delays <- map(delays, function(x) { + apply_tolerance(x, tolerance = attr(x, "tolerance")) + }) + ## get maximum delays + max_delay <- unname(as.numeric(flatten(map(delays, max)))) ## number of different non-empty types - type_n <- unlist(purrr::list_transpose(dot_args, simplify = FALSE)$n) + type_n <- lengths(delays) ## assign ID values to each type ids <- rep(0L, length(type_n)) ids[type_n > 0] <- seq_len(sum(type_n > 0)) names(ids) <- paste(names(type_n), "id", sep = "_") - ## start consructing stan object - ret <- unclass(combined_delays) - ## construct additional variables - ret <- c(ret, list( - types = sum(type_n > 0), - types_p = array(1L - combined_delays$fixed) + delays <- flatten(delays) + parametric <- unname( + vapply(delays, function(x) x$distribution != "nonparametric", logical(1)) + ) + param_length <- unname(vapply(delays[parametric], function(x) { + length(x$parameters) + }, numeric(1))) + nonparam_length <- unname(vapply(delays[!parametric], function(x) { + length(x$pmf) + }, numeric(1))) + distributions <- unname(as.character( + map(delays[parametric], ~ .x$distribution) )) + + ## create stan object + ret <- list( + n = length(delays), + n_p = sum(parametric), + n_np = sum(!parametric), + types = sum(type_n > 0), + types_p = array(as.integer(parametric)) + ) + ## delay identifiers ret$types_id <- integer(0) ret$types_id[ret$types_p == 1] <- seq_len(ret$n_p) @@ -718,20 +742,36 @@ create_stan_delays <- function(..., weight = 1) { ret$types_id <- array(ret$types_id) ## map delays to identifiers ret$types_groups <- array(c(0, cumsum(unname(type_n[type_n > 0]))) + 1) + + ret$params_mean <- array(unname(as.numeric( + map(flatten(map(delays[parametric], ~ .x$parameters)), mean) + ))) + ret$params_sd <- array(unname(as.numeric( + map(flatten(map(delays[parametric], ~ .x$parameters)), sd_dist) + ))) + ret$max <- array(max_delay[parametric]) + + ret$np_pmf <- array(unname(as.numeric( + flatten(map(delays[!parametric], ~ .x$pmf)) + ))) ## get non zero length delay pmf lengths - ret$np_pmf_groups <- array( - c(0, cumsum( - combined_delays$np_pmf_length[combined_delays$np_pmf_length > 0]) - ) + 1 - ) + ret$np_pmf_groups <- array(c(0, cumsum(nonparam_length)) + 1) ## calculate total np pmf length - ret$np_pmf_length <- sum(combined_delays$np_pmf_length) + ret$np_pmf_length <- sum(nonparam_length) + ## get non zero length param lengths + ret$params_groups <- array(c(0, cumsum(param_length)) + 1) + ## calculate total param length + ret$params_length <- sum(param_length) + ## set lower bounds + ret$params_lower <- array(unname(as.numeric(flatten( + map(delays[parametric], function(x) { + lower_bounds(x$distribution)[names(x$parameters)] + }) + )))) ## assign prior weights ret$weight <- array(rep(weight, ret$n_p)) ## assign distribution - ret$dist <- array(match(c(ret$dist), c("lognormal", "gamma")) - 1L) - ## remove auxiliary variables - ret$fixed <- NULL + ret$dist <- array(match(distributions, c("lognormal", "gamma")) - 1L) names(ret) <- paste("delay", names(ret), sep = "_") ret <- c(ret, ids) diff --git a/R/deprecated.R b/R/deprecated.R new file mode 100644 index 000000000..9c9bba0db --- /dev/null +++ b/R/deprecated.R @@ -0,0 +1,338 @@ +#' Generate a Gamma Distribution Definition Based on Parameter Estimates +#' +#' @description `r lifecycle::badge("deprecated")` +#' Deprecated; use [Gamma()] instead to define a gamma distribution. +#' +#' @param shape Numeric, shape parameter of the gamma distribution. +#' +#' @param shape_sd Numeric, standard deviation of the shape parameter. +#' +#' @param scale Numeric, scale parameter of the gamma distribution. +#' +#' @param scale_sd Numeric, standard deviation of the scale parameter. +#' +#' @param samples Numeric, number of sample distributions to generate. +#' +#' @importFrom truncnorm rtruncnorm +#' @return A `` defining the distribution as used by [dist_skel()] +#' @export +#' @inheritParams dist_skel +#' @inheritParams lognorm_dist_def +#' @examples +#' # using estimated shape and scale +#' def <- gamma_dist_def( +#' shape = 5.807, shape_sd = 0.2, +#' scale = 0.9, scale_sd = 0.05, +#' max_value = 20, samples = 10 +#' ) +#' print(def) +#' def$params[[1]] +#' +#' # using mean and sd +#' def <- gamma_dist_def( +#' mean = 3, mean_sd = 0.5, +#' sd = 3, sd_sd = 0.1, +#' max_value = 20, samples = 10 +#' ) +#' print(def) +#' def$params[[1]] +gamma_dist_def <- function(shape, shape_sd, + scale, scale_sd, + mean, mean_sd, + sd, sd_sd, + max_value, samples) { + lifecycle::deprecate_warn( + "1.5.0", "gamma_dist_def()", "Gamma()", + "The function will be removed completely in version 2.0.0." + ) + + if (missing(shape) && missing(scale) && !missing(mean) && !missing(sd)) { + if (!missing(mean_sd)) { + mean <- truncnorm::rtruncnorm(samples, a = 0, mean = mean, sd = mean_sd) + } + if (!missing(sd_sd)) { + sd <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) + } + scale <- sd^2 / mean + shape <- mean / scale + } else { + if (!missing(shape_sd)) { + shape <- truncnorm::rtruncnorm( + samples, + a = 0, mean = shape, sd = shape_sd + ) + } + if (!missing(scale_sd)) { + scale <- 1 / truncnorm::rtruncnorm( + samples, + a = 0, mean = scale, sd = scale_sd + ) + } + } + + rate <- 1 / scale + + dist <- data.table::data.table( + model = rep("gamma", samples), + params = purrr::list_transpose( + list( + shape = shape, + rate = rate + ) + ), + max_value = rep(max_value, samples) + ) + return(dist) +} + +#' Generate a Log Normal Distribution Definition Based on Parameter Estimates +#' +#' @description `r lifecycle::badge("deprecated")` +#' Generates a distribution definition when only parameter estimates +#' are available for log normal distributed parameters. See [rlnorm()] for +#' distribution information. +#' +#' @param mean Numeric, log mean parameter of the gamma distribution. +#' +#' @param mean_sd Numeric, standard deviation of the log mean parameter. +#' +#' @param sd Numeric, log sd parameter of the gamma distribution. +#' +#' @param sd_sd Numeric, standard deviation of the log sd parameter. +#' +#' @param samples Numeric, number of sample distributions to generate. +#' +#' @param to_log Logical, should parameters be logged before use. +#' +#' @return A `` defining the distribution as used by [dist_skel()] +#' @importFrom truncnorm rtruncnorm +#' @export +#' @inheritParams dist_skel +#' @examples +#' def <- lognorm_dist_def( +#' mean = 1.621, mean_sd = 0.0640, +#' sd = 0.418, sd_sd = 0.0691, +#' max_value = 20, samples = 10 +#' ) +#' print(def) +#' def$params[[1]] +#' +#' def <- lognorm_dist_def( +#' mean = 5, mean_sd = 1, +#' sd = 3, sd_sd = 1, +#' max_value = 20, samples = 10, +#' to_log = TRUE +#' ) +#' print(def) +#' def$params[[1]] +lognorm_dist_def <- function(mean, mean_sd, + sd, sd_sd, + max_value, samples, + to_log = FALSE) { + lifecycle::deprecate_warn( + "1.5.0", "lognorm_dist_def()", "LogNormal()", + "The function will be removed completely in version 2.0.0." + ) + + transform_mean <- function(mu, sig) { + mean_location <- log(mu^2 / sqrt(sig^2 + mu^2)) + mean_location + } + + transform_sd <- function(mu, sig) { + mean_shape <- sqrt(log(1 + (sig^2 / mu^2))) + mean_shape + } + + if (missing(mean_sd)) { + sampled_means <- mean + } else { + sampled_means <- truncnorm::rtruncnorm( + samples, + a = 0, mean = mean, sd = mean_sd + ) + } + + if (missing(sd_sd)) { + sampled_sds <- sd + } else { + sampled_sds <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) + } + means <- sampled_means + sds <- sampled_sds + + if (to_log) { + means <- mapply(transform_mean, sampled_means, sampled_sds) + sds <- mapply(transform_sd, sampled_means, sampled_sds) + } + + dist <- data.table::data.table( + model = rep("lognormal", samples), + params = purrr::list_transpose( + list( + meanlog = means, + sdlog = sds + ) + ), + max_value = rep(max_value, samples) + ) + return(dist) +} + +#' Tune an Inverse Gamma to Achieve the Target Truncation +#' +#' @description `r lifecycle::badge("deprecated")` +#' Allows an inverse gamma distribution to be. tuned so that less than 0.01 of +#' its probability mass function falls outside of the specified bounds. This is +#' required when using an inverse gamma prior, for example for a Gaussian +#' process. As no inverse gamma priors are currently in use and this function +#' has some stability issues it has been deprecated. +#' +#' @param lower Numeric, defaults to 2. Lower truncation bound. +#' +#' @param upper Numeric, defaults to 21. Upper truncation bound. +#' +#' @return A list of alpha and beta values that describe a inverse gamma +#' distribution that achieves the target truncation. +#' @export +#' +#' @keywords internal +#' +tune_inv_gamma <- function(lower = 2, upper = 21) { + lifecycle::deprecate_stop( + "1.3.6", "tune_inv_gamma()", + details = paste0( + "As no inverse gamma priors are currently in use and this function has ", + "some stability issues it has been deprecated. Please let the package ", + "authors know if deprecating this function has caused any issues. ", + "For the last active version of the function see the one contained ", + "in version 1.3.5 at ", + "https://github.com/epiforecasts/EpiNow2/blob/bad836ebd650ace73ad1ead887fd0eae98c52dd6/R/dist.R#L739" # nolint + ) + ) +} + +#' Specify a distribution. +#' +#' @description `r lifecycle::badge("deprecated")` +#' This function is deprecated as a user-facing function (while its +#' functionality is still used internally). Construct distributions using +#' the corresponding distribution function such as [Gamma()], [LogNormal()], +#' [Normal()] or [Fixed()] instead. +#' +#' @param distribution Character, defaults to "lognormal". The (discretised) +#' distribution to be used. Can be "lognormal", "gamma", "normal" or "fixed". +#' The corresponding parameters (defined in [natural_params()]) are passed +#' as `params_mean`, and their uncertainty as `params_sd`. +#' +#' @param params_mean Numeric. Central values of the parameters of the +#' distribution as defined in [natural_params(). +#' +#' @param params_sd Numeric. Standard deviations of the parameters of the +#' distribution as defined in [natural_params(). +#' +#' @param max Numeric, maximum value of the distribution. The distribution will +#' be truncated at this value. Default: `Inf`, i.e. no maximum. +#' +#' @param mean Deprecated; use `params_mean` instead. +#' +#' @param sd Deprecated; use `params_mean` instead. +#' +#' @param mean_sd Deprecated; use `params_sd` instead. +#' +#' @param sd_sd Deprecated; use `params_sd` instead. +#' +#' @param pmf Numeric, a vector of values that represent the (nonparametric) +#' probability mass function of the delay (starting with 0); defaults to an +#' empty vector corresponding to a parametric specification of the distribution +#' (using \code{params_mean}, and \code{params_sd}. +#' @param fixed Deprecated, use [fix_dist()] instead. +#' @return A list of distribution options. +#' @importFrom rlang warn arg_match +#' @export +dist_spec <- function(distribution = c( + "lognormal", "normal", "gamma", "fixed", "empty" + ), + params_mean = numeric(0), params_sd = numeric(0), + mean, sd = 0, mean_sd = 0, sd_sd = 0, + max = Inf, pmf = numeric(0), fixed = FALSE) { + + lifecycle::deprecate_warn( + "1.5.0", + "dist_spec()", + details = c( + paste0( + "Please use distribution functions such as `Gamma()` or `Lognormal()` ", + "instead." + ), + "The function will become internal only in version 2.0.0." + ) + ) + ## check for deprecated parameters + if (!missing(fixed)) { + lifecycle::deprecate_warn( + "1.5.0", + "dist_spec(fixed)", + "fix_dist()" + ) + params_sd <- NULL + } + ## check for deprecated parameters + if (!all(missing(mean), missing(sd), missing(mean_sd), missing(sd_sd)) && + (length(params_mean) > 0 || length(params_sd) > 0)) { + stop("Distributional parameters should not be given as `mean`, `sd`, etc. ", + "in addition to `params_mean` or `params_sd`") + } + distribution <- match.arg(distribution) + ## check if distribution is given as empty and warn about deprecation if so + if (distribution == "empty") { + deprecate_warn( + "1.5.0", + "dist_spec(distribution = 'must not be \"empty\"')", + details = "Please use `Fixed(0)` instead." + ) + } + + if (!all(missing(mean), missing(sd), missing(mean_sd), missing(sd_sd))) { + if (sd == 0 && mean_sd == 0 && sd_sd == 0) { + distribution <- "fixed" + } + ## deprecated arguments given + if (distribution == "lognormal") { + params_mean <- c(meanlog = mean, sdlog = sd) + params_sd <- c(meanlog = mean_sd, sdlog = sd_sd) + } else if (distribution == "gamma") { + temp_dist <- Gamma( + mean = Normal(mean, mean_sd), + sd = Normal(sd, sd_sd) + ) + params_mean <- vapply(temp_dist[[1]]$parameters, mean, numeric(1)) + params_sd <- vapply(temp_dist[[1]]$parameters, sd_dist, numeric(1)) + } else if (distribution == "normal") { + params_mean <- c(mean = mean, sd = sd) + params_sd <- c(mean = mean_sd, sd = sd_sd) + } else if (distribution == "fixed") { + params_mean <- mean + } + } + if (length(pmf) > 0) { + if (!all( + missing(mean), missing(sd), missing(mean_sd), missing(sd_sd), + missing(params_mean), missing(params_sd) + )) { + stop("Distributional parameters should not be given in addition to `pmf`") + } + distribution <- "nonparametric" + parameters <- list(pmf = pmf) + } else { + if (length(params_sd) == 0) { + params_sd <- rep(0, length(params_mean)) + } + parameters <- lapply(seq_along(params_mean), function(id) { + Normal(params_mean[id], params_sd[id]) + }) + names(parameters) <- natural_params(distribution) + parameters$max <- max + } + return(new_dist_spec(parameters, distribution)) +} diff --git a/R/dist.R b/R/dist.R deleted file mode 100644 index 2611d7028..000000000 --- a/R/dist.R +++ /dev/null @@ -1,1385 +0,0 @@ -#' Distribution Skeleton -#' -#' @description `r lifecycle::badge("questioning")` -#' This function acts as a skeleton for a truncated distribution defined by -#' model type, maximum value and model parameters. It is designed to be used -#' with the output from [get_dist()]. -#' -#' @param n Numeric vector, number of samples to take (or days for the -#' probability density). -#' -#' @param dist Logical, defaults to `FALSE`. Should the probability density be -#' returned rather than a number of samples. -#' -#' @param cum Logical, defaults to `TRUE`. If `dist = TRUE` should the returned -#' distribution be cumulative. -#' -#' @param model Character string, defining the model to be used. Supported -#' options are exponential ("exp"), gamma ("gamma"), and log normal -#' ("lognormal") -#' -#' @param discrete Logical, defaults to `FALSE`. Should the probability -#' distribution be discretised. In this case each entry of the probability -#' mass function corresponds to the 1-length interval ending at the entry, -#' i.e. the probability mass function is a vector where the first entry -#' corresponds to the integral over the (0,1] interval of the continuous -#' distribution, the second entry corresponds to the (1,2] interval etc. -#' -#' @param params A list of parameters values (by name) required for each model. -#' For the exponential model this is a rate parameter and for the gamma model -#' this is alpha and beta. -#' -#' @param max_value Numeric, the maximum value to allow. Defaults to 120. -#' Samples outside of this range are resampled. -#' -#' @return A vector of samples or a probability distribution. -#' @export -#' @examples -#' -#' ## Exponential model -#' # sample -#' dist_skel(10, model = "exp", params = list(rate = 1)) -#' -#' # cumulative prob density -#' dist_skel(1:10, model = "exp", dist = TRUE, params = list(rate = 1)) -#' -#' # probability density -#' dist_skel(1:10, -#' model = "exp", dist = TRUE, -#' cum = FALSE, params = list(rate = 1) -#' ) -#' -#' ## Gamma model -#' # sample -#' dist_skel(10, model = "gamma", params = list(shape = 1, scale = 2)) -#' -#' # cumulative prob density -#' dist_skel(0:10, -#' model = "gamma", dist = TRUE, -#' params = list(shape = 1, scale = 2) -#' ) -#' -#' # probability density -#' dist_skel(0:10, -#' model = "gamma", dist = TRUE, -#' cum = FALSE, params = list(shape = 2, scale = 2) -#' ) -#' -#' ## Log normal model -#' # sample -#' dist_skel(10, model = "lognormal", params = list(mean = log(5), sd = log(2))) -#' -#' # cumulative prob density -#' dist_skel(0:10, -#' model = "lognormal", dist = TRUE, -#' params = list(mean = log(5), sd = log(2)) -#' ) -#' -#' # probability density -#' dist_skel(0:10, -#' model = "lognormal", dist = TRUE, cum = FALSE, -#' params = list(mean = log(5), sd = log(2)) -#' ) -dist_skel <- function(n, dist = FALSE, cum = TRUE, model, - discrete = FALSE, params, max_value = 120) { - if (model == "exp") { - # define support functions for exponential dist - rdist <- function(n) { - rexp(n, params$rate) - } - pdist <- function(n) { - pexp(n, params$rate) / pexp(max_value, params$rate) - } - ddist <- function(n) { - (pexp(n + 1, params$rate) - - pexp(n, params$rate)) / - pexp(max_value + 1, params$rate) - } - } else if (model == "gamma") { - rdist <- function(n) { - rgamma(n, params$shape, params$scale) - } - pdist <- function(n) { - pgamma(n, params$shape, params$scale) / - pgamma(max_value + 1, params$shape, params$scale) - } - ddist <- function(n) { - (pgamma(n + 1, params$shape, params$scale) - - pgamma(n, params$shape, params$scale)) / - pgamma(max_value + 1, params$shape, params$scale) - } - } else if (model == "lognormal") { - rdist <- function(n) { - rlnorm(n, params$mean, params$sd) - } - pdist <- function(n) { - plnorm(n, params$mean, params$sd) / - plnorm(max_value + 1, params$mean, params$sd) - } - ddist <- function(n) { - (plnorm(n + 1, params$mean, params$sd) - - plnorm(n, params$mean, params$sd)) / - plnorm(max_value + 1, params$mean, params$sd) - } - } - - if (discrete) { - cmf <- c(0, pdist(seq_len(max_value + 1))) - pmf <- diff(cmf) - rdist <- function(n) { - sample(x = seq_len(max_value + 1) - 1, size = n, prob = pmf) - } - pdist <- function(n) { - cmf[n + 1] - } - ddist <- function(n) { - pmf[n + 1] - } - } - - # define internal sampling function - inner_skel <- function(n, dist = FALSE, cum = TRUE, max_value = NULL) { - if (dist) { - if (cum) { - ret <- pdist(n) - } else { - ret <- ddist(n) - } - ret[ret > 1] <- NA_real_ - return(ret) - } else { - rdist(n) - } - } - - # define truncation wrapper - truncated_skel <- function(n, dist, cum, max_value) { - n <- inner_skel(n, dist, cum, max_value) - if (!dist) { - while (any(!is.na(n) & n >= max_value)) { - n <- ifelse(n >= max_value, inner_skel(n), n) - } - - n <- as.integer(n) - } - return(n) - } - - # call function - sample <- truncated_skel(n, dist = dist, cum = cum, max_value = max_value) - return(sample) -} - - -#' Fit an Integer Adjusted Exponential, Gamma or Lognormal distributions -#' -#' @description `r lifecycle::badge("stable")` -#' Fits an integer adjusted exponential, gamma or lognormal distribution using -#' stan. -#' @param values Numeric vector of values -#' -#' @param samples Numeric, number of samples to take. Must be >= 1000. -#' Defaults to 1000. -#' -#' @param dist Character string, which distribution to fit. Defaults to -#' exponential (`"exp"`) but gamma (`"gamma"`) and lognormal (`"lognormal"`) are -#' also supported. -#' -#' @param cores Numeric, defaults to 1. Number of CPU cores to use (no effect -#' if greater than the number of chains). -#' -#' @param chains Numeric, defaults to 2. Number of MCMC chains to use. More is -#' better with the minimum being two. -#' -#' @param verbose Logical, defaults to FALSE. Should verbose progress messages -#' be printed. -#' -#' @return A stan fit of an interval censored distribution -#' @export -#' @inheritParams stan_opts -#' @examples -#' \donttest{ -#' # integer adjusted exponential model -#' dist_fit(rexp(1:100, 2), -#' samples = 1000, dist = "exp", -#' cores = ifelse(interactive(), 4, 1), verbose = TRUE -#' ) -#' -#' -#' # integer adjusted gamma model -#' dist_fit(rgamma(1:100, 5, 5), -#' samples = 1000, dist = "gamma", -#' cores = ifelse(interactive(), 4, 1), verbose = TRUE -#' ) -#' -#' # integer adjusted lognormal model -#' dist_fit(rlnorm(1:100, log(5), 0.2), -#' samples = 1000, dist = "lognormal", -#' cores = ifelse(interactive(), 4, 1), verbose = TRUE -#' ) -#' } -dist_fit <- function(values = NULL, samples = 1000, cores = 1, - chains = 2, dist = "exp", verbose = FALSE, - backend = "rstan") { - if (samples < 1000) { - samples <- 1000 - warning(sprintf("%s %s", "`samples` must be at least 1000.", - "Now setting it to 1000 internally." - ) - ) - } - # model parameters - lows <- values - 1 - lows <- ifelse(lows <= 0, 1e-6, lows) - ups <- values + 1 - - data <- list( - N = length(values), - low = lows, - up = ups, - lam_mean = numeric(0), - prior_mean = numeric(0), - prior_sd = numeric(0), - par_sigma = numeric(0) - ) - - model <- stan_model(backend, "dist_fit") - - if (dist == "exp") { - data$dist <- 0 - data$lam_mean <- array(mean(values)) - } else if (dist == "lognormal") { - data$dist <- 1 - data$prior_mean <- array(log(mean(values))) - data$prior_sd <- array(log(sd(values))) - } else if (dist == "gamma") { - data$dist <- 2 - data$prior_mean <- array(mean(values)) - data$prior_sd <- array(sd(values)) - data$par_sigma <- array(1.0) - } - - # set adapt delta based on the sample size - if (length(values) <= 30) { - adapt_delta <- 0.999 - } else { - adapt_delta <- 0.9 - } - - # fit model - args <- create_stan_args( - stan = stan_opts( - model, - samples = samples, - warmup = 1000, - control = list(adapt_delta = adapt_delta), - chains = chains, - cores = cores, - backend = backend - ), - data = data, verbose = verbose, model = "dist_fit" - ) - - fit <- fit_model(args, id = "dist_fit") - - return(fit) -} - - -#' Generate a Gamma Distribution Definition Based on Parameter Estimates -#' -#' @description `r lifecycle::badge("soft-deprecated")` -#' Generates a distribution definition when only parameter estimates -#' are available for gamma distributed parameters. See [rgamma()] for -#' distribution information. -#' -#' @param shape Numeric, shape parameter of the gamma distribution. -#' -#' @param shape_sd Numeric, standard deviation of the shape parameter. -#' -#' @param scale Numeric, scale parameter of the gamma distribution. -#' -#' @param scale_sd Numeric, standard deviation of the scale parameter. -#' -#' @param samples Numeric, number of sample distributions to generate. -#' -#' @importFrom truncnorm rtruncnorm -#' @return A `` defining the distribution as used by [dist_skel()] -#' @export -#' @inheritParams dist_skel -#' @inheritParams lognorm_dist_def -#' @examples -#' # using estimated shape and scale -#' def <- gamma_dist_def( -#' shape = 5.807, shape_sd = 0.2, -#' scale = 0.9, scale_sd = 0.05, -#' max_value = 20, samples = 10 -#' ) -#' print(def) -#' def$params[[1]] -#' -#' # using mean and sd -#' def <- gamma_dist_def( -#' mean = 3, mean_sd = 0.5, -#' sd = 3, sd_sd = 0.1, -#' max_value = 20, samples = 10 -#' ) -#' print(def) -#' def$params[[1]] -gamma_dist_def <- function(shape, shape_sd, - scale, scale_sd, - mean, mean_sd, - sd, sd_sd, - max_value, samples) { - if (missing(shape) && missing(scale) && !missing(mean) && !missing(sd)) { - if (!missing(mean_sd)) { - mean <- truncnorm::rtruncnorm(samples, a = 0, mean = mean, sd = mean_sd) - } - if (!missing(sd_sd)) { - sd <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) - } - scale <- sd^2 / mean - shape <- mean / scale - scale <- 1 / scale - } else { - if (!missing(shape_sd)) { - shape <- truncnorm::rtruncnorm( - samples, - a = 0, mean = shape, sd = shape_sd - ) - } - if (!missing(scale_sd)) { - scale <- 1 / truncnorm::rtruncnorm( - samples, - a = 0, mean = scale, sd = scale_sd - ) - } - } - - dist <- data.table::data.table( - model = rep("gamma", samples), - params = purrr::list_transpose( - list( - shape = shape, - scale = scale - ), - simplify = FALSE - ), - max_value = rep(max_value, samples) - ) - return(dist) -} - -#' Generate a Log Normal Distribution Definition Based on Parameter Estimates -#' -#' @description `r lifecycle::badge("soft-deprecated")` -#' Generates a distribution definition when only parameter estimates -#' are available for log normal distributed parameters. See [rlnorm()] for -#' distribution information. -#' -#' @param mean Numeric, log mean parameter of the gamma distribution. -#' -#' @param mean_sd Numeric, standard deviation of the log mean parameter. -#' -#' @param sd Numeric, log sd parameter of the gamma distribution. -#' -#' @param sd_sd Numeric, standard deviation of the log sd parameter. -#' -#' @param samples Numeric, number of sample distributions to generate. -#' -#' @param to_log Logical, should parameters be logged before use. -#' -#' @return A `` defining the distribution as used by [dist_skel()] -#' @importFrom truncnorm rtruncnorm -#' @export -#' @inheritParams dist_skel -#' @examples -#' def <- lognorm_dist_def( -#' mean = 1.621, mean_sd = 0.0640, -#' sd = 0.418, sd_sd = 0.0691, -#' max_value = 20, samples = 10 -#' ) -#' print(def) -#' def$params[[1]] -#' -#' def <- lognorm_dist_def( -#' mean = 5, mean_sd = 1, -#' sd = 3, sd_sd = 1, -#' max_value = 20, samples = 10, -#' to_log = TRUE -#' ) -#' print(def) -#' def$params[[1]] -lognorm_dist_def <- function(mean, mean_sd, - sd, sd_sd, - max_value, samples, - to_log = FALSE) { - transform_mean <- function(mu, sig) { - mean_location <- log(mu^2 / sqrt(sig^2 + mu^2)) - mean_location - } - - transform_sd <- function(mu, sig) { - mean_shape <- sqrt(log(1 + (sig^2 / mu^2))) - mean_shape - } - - if (missing(mean_sd)) { - sampled_means <- mean - } else { - sampled_means <- truncnorm::rtruncnorm( - samples, - a = 0, mean = mean, sd = mean_sd - ) - } - - if (missing(sd_sd)) { - sampled_sds <- sd - } else { - sampled_sds <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) - } - means <- sampled_means - sds <- sampled_sds - - if (to_log) { - means <- mapply(transform_mean, sampled_means, sampled_sds) - sds <- mapply(transform_sd, sampled_means, sampled_sds) - } - - dist <- data.table::data.table( - model = rep("lognormal", samples), - params = purrr::list_transpose( - list( - mean = means, - sd = sds - ), - simplify = FALSE - ), - max_value = rep(max_value, samples) - ) - return(dist) -} - -#' Fit a Subsampled Bootstrap to Integer Values and Summarise Distribution -#' Parameters -#' -#' @description `r lifecycle::badge("stable")` -#' Fits an integer adjusted distribution to a subsampled bootstrap of data and -#' then integrates the posterior samples into a single set of summary -#' statistics. Can be used to generate a robust reporting delay that accounts -#' for the fact the underlying delay likely varies over time or that the size -#' of the available reporting delay sample may not be representative of the -#' current case load. -#' -#' @param values Integer vector of values. -#' -#' @param dist Character string, which distribution to fit. Defaults to -#' lognormal (`"lognormal"`) but gamma (`"gamma"`) is also supported. -#' -#' @param verbose Logical, defaults to `FALSE`. Should progress messages be -#' printed. -#' -#' @param samples Numeric, number of samples to take overall from the -#' bootstrapped posteriors. -#' -#' @param bootstraps Numeric, defaults to 1. The number of bootstrap samples -#' (with replacement) of the delay distribution to take. -#' -#' @param bootstrap_samples Numeric, defaults to 100. The number of samples to -#' take in each bootstrap. When the sample size of the supplied delay -#' distribution is less than 100 this is used instead. -#' -#' @param max_value Numeric, defaults to the maximum value in the observed -#' data. Maximum delay to allow (added to output but does impact fitting). -#' -#' @return A `` object summarising the bootstrapped distribution -#' @importFrom purrr list_transpose -#' @importFrom future.apply future_lapply -#' @importFrom rstan extract -#' @importFrom data.table data.table rbindlist -#' @export -#' @examples -#' \donttest{ -#' # lognormal -#' delays <- rlnorm(500, log(5), 1) -#' out <- bootstrapped_dist_fit(delays, -#' samples = 1000, bootstraps = 10, -#' dist = "lognormal" -#' ) -#' out -#' } -bootstrapped_dist_fit <- function(values, dist = "lognormal", - samples = 2000, bootstraps = 10, - bootstrap_samples = 250, max_value, - verbose = FALSE) { - if (!dist %in% c("gamma", "lognormal")) { - stop("Only lognormal and gamma distributions are supported") - } - - if (samples < bootstraps) { - samples <- bootstraps - } - ## Make values integer if not - values <- as.integer(values) - ## Remove NA values - values <- values[!is.na(values)] - ## Filter out negative values - values <- values[values >= 0] - - get_single_dist <- function(values, samples = 1) { - set_dt_single_thread() - - fit <- EpiNow2::dist_fit(values, samples = samples, dist = dist) - - - out <- list() - if (dist == "lognormal") { - out$mean_samples <- sample(extract(fit)$mu, samples) - out$sd_samples <- sample(extract(fit)$sigma, samples) - } else if (dist == "gamma") { - alpha_samples <- sample(extract(fit)$alpha, samples) - beta_samples <- sample(extract(fit)$beta, samples) - out$mean_samples <- alpha_samples / beta_samples - out$sd_samples <- sqrt(alpha_samples) / beta_samples - } - return(out) - } - - - if (bootstraps == 1) { - dist_samples <- get_single_dist(values, samples = samples) - } else { - ## Fit each sub sample - dist_samples <- future.apply::future_lapply(1:bootstraps, - function(boot) { - get_single_dist( - sample(values, - min(length(values), bootstrap_samples), - replace = TRUE - ), - samples = ceiling(samples / bootstraps) - ) - }, - future.scheduling = Inf, - future.globals = c( - "values", "bootstraps", "samples", - "bootstrap_samples", "get_single_dist" - ), - future.packages = "data.table", future.seed = TRUE - ) - - - dist_samples <- purrr::list_transpose(dist_samples, simplify = FALSE) - dist_samples <- purrr::map(dist_samples, unlist) - } - - out <- list() - out$mean <- mean(dist_samples$mean_samples) - out$mean_sd <- sd(dist_samples$mean_samples) - out$sd <- mean(dist_samples$sd_sample) - out$sd_sd <- sd(dist_samples$sd_samples) - if (!missing(max_value)) { - out$max <- max_value - } else { - out$max <- max(values) - } - return(do.call(dist_spec, out)) -} - -#' Estimate a Delay Distribution -#' -#' @description `r lifecycle::badge("maturing")` -#' Estimate a log normal delay distribution from a vector of integer delays. -#' Currently this function is a simple wrapper for [bootstrapped_dist_fit()]. -#' -#' @param delays Integer vector of delays -#' -#' @param ... Arguments to pass to internal methods. -#' -#' @return A `` summarising the bootstrapped distribution -#' @export -#' @seealso [bootstrapped_dist_fit()] -#' @examples -#' \donttest{ -#' delays <- rlnorm(500, log(5), 1) -#' estimate_delay(delays, samples = 1000, bootstraps = 10) -#' } -estimate_delay <- function(delays, ...) { - fit <- bootstrapped_dist_fit( - values = delays, - dist = "lognormal", ... - ) - return(fit) -} - -#' Approximate Sampling a Distribution using Counts -#' -#' @description `r lifecycle::badge("soft-deprecated")` -#' Convolves cases by a PMF function. This function will soon be removed or -#' replaced with a more robust stan implementation. -#' -#' @param cases A `` of cases (in date order) with the following -#' variables: `date` and `cases`. -#' -#' @param max_value Numeric, maximum value to allow. Defaults to 120 days -#' -#' @param direction Character string, defato "backwards". Direction in which to -#' map cases. Supports either "backwards" or "forwards". -#' -#' @param dist_fn Function that takes two arguments with the first being -#' numeric and the second being logical (and defined as `dist`). Should return -#' the probability density or a sample from the defined distribution. See -#' the examples for more. -#' -#' @param earliest_allowed_mapped A character string representing a date -#' ("2020-01-01"). Indicates the earliest allowed mapped value. -#' -#' @param type Character string indicating the method to use to transform -#' counts. Supports either "sample" which approximates sampling or "median" -#' would shift by the median of the distribution. -#' -#' @param truncate_future Logical, should cases be truncated if they occur -#' after the first date reported in the data. Defaults to `TRUE`. -#' -#' @return A `` of cases by date of onset -#' @export -#' @importFrom data.table data.table setorder -#' @importFrom lubridate days -#' @examples -#' \donttest{ -#' cases <- example_confirmed -#' cases <- cases[, cases := as.integer(confirm)] -#' print(cases) -#' -#' # total cases -#' sum(cases$cases) -#' -#' delay_fn <- function(n, dist, cum) { -#' if (dist) { -#' pgamma(n + 0.9999, 2, 1) - pgamma(n - 1e-5, 2, 1) -#' } else { -#' as.integer(rgamma(n, 2, 1)) -#' } -#' } -#' -#' onsets <- sample_approx_dist( -#' cases = cases, -#' dist_fn = delay_fn -#' ) -#' -#' # estimated onset distribution -#' print(onsets) -#' -#' # check that sum is equal to reported cases -#' total_onsets <- median( -#' purrr::map_dbl( -#' 1:10, -#' ~ sum(sample_approx_dist( -#' cases = cases, -#' dist_fn = delay_fn -#' )$cases) -#' ) -#' ) -#' total_onsets -#' -#' -#' # map from onset cases to reported -#' reports <- sample_approx_dist( -#' cases = cases, -#' dist_fn = delay_fn, -#' direction = "forwards" -#' ) -#' -#' -#' # map from onset cases to reported using a mean shift -#' reports <- sample_approx_dist( -#' cases = cases, -#' dist_fn = delay_fn, -#' direction = "forwards", -#' type = "median" -#' ) -#' } -sample_approx_dist <- function(cases = NULL, - dist_fn = NULL, - max_value = 120, - earliest_allowed_mapped = NULL, - direction = "backwards", - type = "sample", - truncate_future = TRUE) { - if (type == "sample") { - if (direction == "backwards") { - direction_fn <- rev - } else if (direction == "forwards") { - direction_fn <- function(x) { - x - } - } - # reverse cases so starts with current first - reversed_cases <- direction_fn(cases$cases) - reversed_cases[is.na(reversed_cases)] <- 0 - # draw from the density fn of the dist - draw <- dist_fn(0:max_value, dist = TRUE, cum = FALSE) - - # approximate cases - mapped_cases <- do.call(cbind, purrr::map( - seq_along(reversed_cases), - ~ c( - rep(0, . - 1), - stats::rbinom( - length(draw), - rep(reversed_cases[.], length(draw)), - draw - ), - rep(0, length(reversed_cases) - .) - ) - )) - - - # set dates order based on direction mapping - if (direction == "backwards") { - dates <- seq(min(cases$date) - lubridate::days(length(draw) - 1), - max(cases$date), - by = "days" - ) - } else if (direction == "forwards") { - dates <- seq(min(cases$date), - max(cases$date) + lubridate::days(length(draw) - 1), - by = "days" - ) - } - - # summarises movements and sample for placement of non-integer cases - case_sum <- direction_fn(rowSums(mapped_cases)) - floor_case_sum <- floor(case_sum) - sample_cases <- floor_case_sum + - as.numeric((runif(seq_along(case_sum)) < (case_sum - floor_case_sum))) - - # summarise imputed onsets and build output data.table - mapped_cases <- data.table::data.table( - date = dates, - cases = sample_cases - ) - - # filter out all zero cases until first recorded case - mapped_cases <- data.table::setorder(mapped_cases, date) - mapped_cases <- mapped_cases[ - , - cum_cases := cumsum(cases) - ][cum_cases != 0][, cum_cases := NULL] - } else if (type == "median") { - shift <- as.integer( - median(as.integer(dist_fn(1000, dist = FALSE)), na.rm = TRUE) - ) - - if (direction == "backwards") { - mapped_cases <- data.table::copy(cases)[ - , - date := date - lubridate::days(shift) - ] - } else if (direction == "forwards") { - mapped_cases <- data.table::copy(cases)[ - , - date := date + lubridate::days(shift) - ] - } - } - - if (!is.null(earliest_allowed_mapped)) { - mapped_cases <- mapped_cases[date >= as.Date(earliest_allowed_mapped)] - } - - # filter out future cases - if (direction == "forwards" && truncate_future) { - max_date <- max(cases$date) - mapped_cases <- mapped_cases[date <= max_date] - } - return(mapped_cases) -} - -#' Specify a distribution. -#' -#' @description `r lifecycle::badge("stable")` -#' Defines the parameters of a supported distribution for use in onward -#' modelling. Multiple distribution families are supported - see the -#' documentation for `family` for details. Alternatively, a nonparametric -#' distribution can be specified using the \code{pmf} argument. -#' This function provides distribution -#' functionality in [delay_opts()], [generation_time_opts()], and -#' [trunc_opts()]. -#' -#' @param mean Numeric. If the only non-zero summary parameter -#' then this is the fixed interval of the distribution. If the `sd` is -#' non-zero then this is the mean of the distribution given by \code{dist}. -#' If this is not given a vector of empty vectors is returned. -#' -#' @param sd Numeric, defaults to 0. Sets the standard deviation of the -#' distribution. -#' -#' @param mean_sd Numeric, defaults to 0. Sets the standard deviation of the -#' uncertainty around the mean of the distribution assuming a normal -#' prior. -#' -#' @param sd_sd Numeric, defaults to 0. Sets the standard deviation of the -#' uncertainty around the sd of the distribution assuming a normal prior. -#' -#' @param distribution Character, defaults to "lognormal". The (discretised -#' distribution to be used. If sd == 0 then the distribution is fixed and a -#' delta function is used. If sd > 0 then the distribution is discretised and -#' truncated. -#' -#' The following distributions are currently supported: -#' -#' - "lognormal" - a lognormal distribution. For this distribution `mean` -#' is the mean of the natural logarithm of the delay (on the log scale) and -#' `sd` is the standard deviation of the natural logarithm of the delay. -#' -#' - "gamma" - a gamma distribution. For this distribution `mean` is the -#' mean of the delay and `sd` is the standard deviation of the delay. During -#' model fitting these are then transformed to the shape and scale of the gamma -#' distribution. -#' -#' When `distribution` is the default lognormal distribution the other function -#' arguments have the following definition: -#' - `mean` is the mean of the natural logarithm of the delay (on the -#' log scale). -#' - `sd` is the standard deviation of the natural logarithm of the delay. -#' -#' @param max Numeric, maximum value of the distribution. The distribution will -#' be truncated at this value. -#' -#' @param pmf Numeric, a vector of values that represent the (nonparametric) -#' probability mass function of the delay (starting with 0); defaults to an -#' empty vector corresponding to a parametric specification of the distribution -#' (using \code{mean}, \code{sd} and corresponding uncertainties) -#' -#' @param fixed Deprecated, use [fix_dist()] instead -#' as coming from fixed (vs uncertain) distributions. Overrides any values -#' assigned to \code{mean_sd} and \code{sd_sd} by setting them to zero. -#' reduces compute requirement but may produce spuriously precise estimates. -#' @return A list of distribution options. -#' -#' @importFrom rlang warn arg_match -#' @export -#' @examples -#' # A fixed lognormal distribution with mean 5 and sd 1. -#' dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") -#' -#' # An uncertain gamma distribution with mean 3 and sd 2 -#' dist_spec( -#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, -#' distribution = "gamma" -#' ) -dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0, - distribution = c("lognormal", "gamma"), max, - pmf = numeric(0), fixed = FALSE) { - ## deprecate previous behaviour - warn( - message = paste( - "The meaning of the 'max' argument has changed compared to", - "previous versions. It now indicates the maximum of a distribution", - "rather than the length of the probability mass function (including 0)", - "that it represented previously. To replicate previous behaviour reduce", - "max by 1." - ), - .frequency = "regularly", - .frequency_id = "dist_spec_max" - ) - ## check for deprecated parameters - if (!missing(fixed)) { - deprecate_warn( - "2.0.0", - "dist_spec(fixed)", - "fix_dist()", - "The argument will be removed completely in version 2.1.0." - ) - mean_sd <- 0 - sd_sd <- 0 - } - ## check if parametric or nonparametric - if (length(pmf) > 0 && - !all( - missing(mean), missing(sd), missing(mean_sd), - missing(sd_sd), missing(distribution), missing(max) - )) { - stop("Distributional parameters or a pmf can be specified, but not both.") - } - - fixed <- mean_sd == 0 && sd_sd == 0 - - ## check parametric parameters make sense - if (!missing(mean)) { - if (sd == 0 && sd_sd == 0) { ## integer fixed - if (mean %% 1 != 0) { - stop( - "When a distribution is set to a constant ", - "(sd == 0 and sd_sd == 0) then the mean parameter ", - "must be an integer." - ) - } - max <- mean - if (mean_sd > 0) { - stop( - "When a distribution has sd == 0 and ", - "sd_sd == 0 then mean_sd must be 0, too." - ) - } - } else { - if (missing(max)) { - stop("Maximum of parametric distributions must be specified.") - } - } - } else { - if (!all( - missing(sd), missing(mean_sd), - missing(sd_sd), missing(distribution), missing(max) - )) { - stop( - "If any distributional parameters are given then so must the mean." - ) - } - } - - distribution <- arg_match(distribution) - if (fixed) { - ret <- list( - mean_mean = numeric(0), - mean_sd = numeric(0), - sd_mean = numeric(0), - sd_sd = numeric(0), - dist = character(0), - max = integer(0) - ) - if (length(pmf) == 0) { - if (missing(mean)) { ## empty - ret <- c(ret, list( - n = 0, - n_p = 0, - n_np = 0, - np_pmf = numeric(0), - fixed = integer(0) - )) - } else { ## parametric fixed - if (sd == 0) { ## delta - pmf <- c(rep(0, mean), 1) - } else { - if (distribution == "lognormal") { - params <- lognorm_dist_def( - mean = mean, mean_sd = mean_sd, - sd = sd, sd_sd = sd_sd, max_value = max, samples = 1 - ) - } else if (distribution == "gamma") { - params <- gamma_dist_def( - mean = mean, mean_sd = mean_sd, - sd = sd, sd_sd = sd_sd, max_value = max, samples = 1 - ) - } - pmf <- dist_skel( - n = seq_len(max + 1) - 1, dist = TRUE, cum = FALSE, - model = distribution, params = params$params[[1]], max_value = max, - discrete = TRUE - ) - } - } - } else { ## nonparametric fixed - if (!missing(max) && (max + 1) < length(pmf)) { - pmf <- pmf[1:(max + 1)] - } - pmf <- pmf / sum(pmf) - } - if (length(pmf) > 0) { - ret <- c(ret, list( - n = 1, - n_p = 0, - n_np = 1, - np_pmf = pmf, - fixed = 1L - )) - } - } else { - ret <- list( - mean_mean = mean, - mean_sd = mean_sd, - sd_mean = sd, - sd_sd = sd_sd, - dist = distribution, - max = max, - n = 1, - n_p = 1, - n_np = 0, - np_pmf = numeric(0), - fixed = 0L - ) - } - ret <- purrr::map(ret, array) - sum_args <- grep("(^n$|^n_$)", names(ret)) - ret$np_pmf_length <- length(ret$np_pmf) - ret[sum_args] <- purrr::map(ret[sum_args], sum) - attr(ret, "class") <- c("list", "dist_spec") - return(ret) -} - -#' Creates a delay distribution as the sum of two other delay distributions -#' -#' This is done via convolution with [stats::convolve()]. Nonparametric delays -#' that can be combined are processed together, and their cumulative -#' distribution function is truncated at a specified tolerance level, ensuring -#' numeric stability. -#' -#' @param e1 The first delay distribution (from a call to [dist_spec()]) to -#' combine. -#' -#' @param e2 The second delay distribution (from a call to [dist_spec()]) to -#' combine. -#' -#' @param tolerance A numeric value that sets the cumulative probability -#' to retain when truncating the cumulative distribution function of the -#' combined nonparametric delays. The default value is 0.001 with this retaining -#' 0.999 of the cumulative probability. Note that using a larger tolerance may -#' result in a smaller number of points in the combined nonparametric delay but -#' may also impact the accuracy of the combined delay (i.e., change the mean -#' and standard deviation). -#' -#' @return A delay distribution representing the sum of the two delays -#' (with class [dist_spec()]) -#' -#' @importFrom stats convolve -dist_spec_plus <- function(e1, e2, tolerance = 0.001) { - ## process delay distributions - delays <- c(e1, e2) - ## combine any nonparametric delays that can be combined - if (sum(delays$fixed) > 1) { - new_pmf <- 1L - group_starts <- c(1L, cumsum(delays$np_pmf_length) + 1L) - for (i in seq_len(length(group_starts) - 1L)) { - new_pmf <- stats::convolve( - new_pmf, - rev(delays$np_pmf[seq(group_starts[i], group_starts[i + 1L] - 1L)]), - type = "open" - ) - } - if (tolerance > 0 && length(new_pmf) > 1) { - cdf <- cumsum(new_pmf) - new_pmf <- new_pmf[c(TRUE, (1 - cdf[-length(cdf)]) >= tolerance)] - new_pmf <- new_pmf / sum(new_pmf) - } - delays$np_pmf <- new_pmf - delays$fixed <- c(1, rep(0, delays$n_p)) - delays$n_np <- 1 - delays$n <- delays$n_p + 1 - } - delays$np_pmf_length <- length(delays$np_pmf) - return(delays) -} - -#' Creates a delay distribution as the sum of two other delay distributions -#' -#' This is done via convolution with [stats::convolve()]. Nonparametric delays -#' that can be combined are processed together, and their cumulative -#' distribution function is truncated at a specified tolerance level, ensuring -#' numeric stability. -#' -#' @return A delay distribution representing the sum of the two delays -#' (with class [dist_spec()]) -#' @inheritParams dist_spec_plus -#' @method + dist_spec -#' @export -#' @examples -#' # A fixed lognormal distribution with mean 5 and sd 1. -#' lognormal <- dist_spec( -#' mean = 1.6, sd = 1, max = 20, distribution = "lognormal" -#' ) -#' lognormal + lognormal -#' -#' # An uncertain gamma distribution with mean 3 and sd 2 -#' gamma <- dist_spec( -#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, -#' distribution = "gamma" -#' ) -#' lognormal + gamma -#' -#' # Using tolerance parameter -#' EpiNow2:::dist_spec_plus(lognormal, lognormal, tolerance = 0.5) -`+.dist_spec` <- function(e1, e2) { - dist_spec_plus(e1, e2, tolerance = 0.001) -} - -#' Combines multiple delay distributions for further processing -#' -#' This combines the parameters so that they can be fed as multiple delay -#' distributions to [epinow()] or [estimate_infections()]. -#' -#' @param ... The delay distributions (from calls to [dist_spec()]) to combine -#' @return Combined delay distributions (with class ``) -#' @method c dist_spec -#' @importFrom purrr list_transpose map -c.dist_spec <- function(...) { - ## process delay distributions - delays <- list(...) - if (!(all(vapply(delays, is, FALSE, "dist_spec")))) { - stop( - "Delay distribution can only be concatenated with other delay ", - "distributions." - ) - } - ## transpose delays - delays <- purrr::list_transpose(delays, simplify = FALSE) - ## convert back to arrays - delays <- purrr::map(delays, function(x) array(unlist(x))) - sum_args <- grep("^n($|_)", names(delays)) - delays[sum_args] <- purrr::map(delays[sum_args], sum) - attr(delays, "class") <- c("list", "dist_spec") - return(delays) -} - -##' Returns the mean of one or more delay distribution -##' -##' This works out the mean of all the (parametric / nonparametric) delay -##' distributions combined in the passed [dist_spec()]. -##' -##' @param x The `` to use -##' @param ... Not used -##' @return A vector of means. -##' @method mean dist_spec -##' @importFrom utils head -##' @export -#' @examples -#' # A fixed lognormal distribution with mean 5 and sd 1. -#' lognormal <- dist_spec( -#' mean = 5, sd = 1, max = 20, distribution = "lognormal" -#' ) -#' mean(lognormal) -#' -#' # An uncertain gamma distribution with mean 3 and sd 2 -#' gamma <- dist_spec( -#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, -#' distribution = "gamma" -#' ) -#' mean(gamma) -#' -#' # The mean of the sum of two distributions -#' mean(lognormal + gamma) -mean.dist_spec <- function(x, ...) { - ret <- rep(0, x$n) - ## nonparametric - if (x$n_np > 0) { - init_id <- c(1, head(cumsum(x$np_pmf_length) + 1, n = -1)) - ret[x$fixed == 1L] <- vapply(seq_along(init_id), function(id) { - pmf <- x$np_pmf[seq(init_id[id], cumsum(x$np_pmf_length)[id])] - return(sum((seq_len(x$np_pmf_length[id]) - 1) * pmf)) - }, 0) - } - ## parametric - if (x$n_p > 0) { - ret[x$fixed == 0L] <- vapply(seq_along(which(x$fixed == 0L)), function(id) { - if (x$dist[id] == "lognormal") { - return(exp(x$mean_mean[id] + x$sd_mean[id] / 2)) - } else if (x$dist[id] == "gamma") { - return(x$mean_mean[id]) - } else { - stop("Unknown distribution: ", x$dist[id]) - } - }, 0) - } - return(ret) -} - -#' Prints the parameters of one or more delay distributions -#' -#' This displays the parameters of the uncertain and probability mass -#' functions of fixed delay distributions combined in the passed [dist_spec()]. -#' @param x The `` to use -#' @param ... Not used -#' @return invisible -#' @method print dist_spec -#' @export -#' @examples -#' #' # A fixed lognormal distribution with mean 5 and sd 1. -#' lognormal <- dist_spec( -#' mean = 1.5, sd = 0.5, max = 20, distribution = "lognormal" -#' ) -#' print(lognormal) -#' -#' # An uncertain gamma distribution with mean 3 and sd 2 -#' gamma <- dist_spec( -#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, -#' distribution = "gamma" -#' ) -#' print(gamma) -print.dist_spec <- function(x, ...) { - cat("\n") - if (x$n == 0) { - cat("Empty `dist_spec` distribution.\n") - return(invisible(NULL)) - } else if (x$n > 1) { - cat("Combination of delay distributions:\n") - } - fixed_id <- 1 - fixed_pos <- 1 - variable_id <- 1 - for (i in 1:x$n) { - cat(" ") - if (!is.null(x$names) && nchar(x$names[i]) > 0) { - cat(x$names[i], ": ", sep = "") - } - if (x$fixed[i] == 0) { - dist <- x$dist[variable_id] - cat( - "Uncertain ", dist, " distribution with (untruncated) ", - ifelse(dist == "lognormal", "log", ""), - "mean ", signif(x$mean_mean[variable_id], digits = 2), - " (SD ", signif(x$mean_sd[variable_id], digits = 2), ") and ", - ifelse(dist == "lognormal", "log", ""), - "SD ", signif(x$sd_mean[variable_id], 2), - " (SD ", signif(x$sd_sd[variable_id], 2), ")\n", - sep = "" - ) - variable_id <- variable_id + 1 - } else { - cat( - "Fixed distribution with PMF [", - paste(signif( - x$np_pmf[seq(fixed_pos, fixed_pos + x$np_pmf_length[fixed_id] - 1)], - digits = 2 - ), collapse = " "), - "]\n", - sep = "" - ) - fixed_id <- fixed_id + 1 - fixed_pos <- fixed_pos + x$np_pmf_length[i] - } - } - cat("\n") -} - -#' Plot PMF and CDF for a dist_spec object -#' -#' This function takes a `` object and plots its probability mass -#' function (PMF) and cumulative distribution function (CDF) using `{ggplot2}`. -#' Note that currently uncertainty in distributions is not plot. -#' -#' @param x A `` object -#' @param ... Additional arguments to pass to `{ggplot}`. -#' @importFrom ggplot2 aes geom_col geom_step facet_wrap vars theme_bw -#' @export -#' @examples -#' #' # A fixed lognormal distribution with mean 5 and sd 1. -#' lognormal <- dist_spec( -#' mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal" -#' ) -#' plot(lognormal) -#' -#' # An uncertain gamma distribution with mean 3 and sd 2 -#' gamma <- dist_spec( -#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, -#' distribution = "gamma" -#' ) -#' plot(gamma) -#' -#' # Multiple distributions -#' plot(lognormal + gamma + lognormal) -#' -#' # A combination of the two fixed distributions -#' plot(lognormal + lognormal) -plot.dist_spec <- function(x, ...) { - distribution <- cdf <- NULL - # Get the PMF and CDF data - pmf_data <- data.frame( - value = numeric(), pmf = numeric(), - distribution = factor() - ) - cdf_data <- data.frame( - value = numeric(), cdf = numeric(), - distribution = factor() - ) - variable_id <- 1 - fixed_id <- 1 - group_starts <- c(1L, cumsum(x$np_pmf_length) + 1L) - for (i in 1:x$n) { - if (x$fixed[i] == 0) { - # Uncertain distribution - mean <- x$mean_mean[variable_id] - sd <- x$sd_mean[variable_id] - c_dist <- dist_spec( - mean = mean, sd = sd, max = x$max[variable_id], - distribution = x$dist[variable_id] - ) - pmf <- c_dist$np_pmf - variable_id <- variable_id + 1 - dist_name <- paste0("Uncertain ", x$dist[variable_id], " (ID: ", i, ")") - } else { - # Fixed distribution - pmf <- x$np_pmf[seq(group_starts[i], group_starts[i + 1L] - 1L)] - dist_name <- paste0("Fixed", " (ID: ", i, ")") - fixed_id <- fixed_id + 1 - } - pmf_data <- rbind( - pmf_data, - data.frame( - value = seq_along(pmf), pmf = pmf, distribution = dist_name - ) - ) - cumsum_pmf <- cumsum(pmf) - cdf_data <- rbind( - cdf_data, - data.frame( - value = seq_along(pmf), cdf = cumsum_pmf / sum(pmf), - distribution = dist_name - ) - ) - } - - # Plot PMF and CDF as facets in the same plot - plot <- ggplot() + - aes(x = value, y = pmf) + - geom_col(data = pmf_data) + - geom_step(data = cdf_data, aes(y = cdf)) + - facet_wrap(vars(distribution)) + - labs(x = "Day", y = "Probability density") + - theme_bw() - return(plot) -} - -##' Fix the parameters of a `` object -##' -##' If the given `` has any uncertainty, it is removed and the -##' corresponding distribution converted into a fixed one. -##' @return A `` object without uncertainty -##' @export -##' @param x A `` object -##' @param strategy Character; either "mean" (use the mean estimates of the -##' mean and standard deviation) or "sample" (randomly sample mean and -##' standard deviation from uncertainty given in the `` -##' @importFrom truncnorm rtruncnorm -##' @importFrom rlang arg_match -fix_dist <- function(x, strategy = c("mean", "sample")) { - ## if x is fixed already we don't have to do anything - if (x$fixed) return(x) - ## match startegy argument to options - strategy <- arg_match(strategy) - ## apply stragey depending on choice - if (strategy == "mean") { - x <- dist_spec( - mean = c(x$mean_mean), - sd = c(x$sd_mean), - mean_sd = 0, - sd_sd = 0, - distribution = x$dist, - max = c(x$max) - ) - } else if (strategy == "sample") { - lower_bound <- ifelse(x$dist == "gamma", 0, -Inf) - mean <- rtruncnorm( - n = 1, a = lower_bound, mean = x$mean_mean, sd = x$mean_sd - ) - sd <- rtruncnorm(n = 1, a = 0, mean = x$sd_mean, sd = x$mean_sd) - x <- dist_spec( - mean = mean, - sd = sd, - mean_sd = 0, - sd_sd = 0, - distribution = x$dist, - max = c(x$max) - ) - } - return(x) -} diff --git a/R/dist_spec.R b/R/dist_spec.R new file mode 100644 index 000000000..a1d1645b8 --- /dev/null +++ b/R/dist_spec.R @@ -0,0 +1,1117 @@ +#' Distribution Skeleton +#' +#' @description `r lifecycle::badge("questioning")` +#' This function acts as a skeleton for a truncated distribution defined by +#' model type, maximum value and model parameters. It is designed to be used +#' with the output from [get_dist()]. +#' +#' @param n Numeric vector, number of samples to take (or days for the +#' probability density). +#' +#' @param dist Logical, defaults to `FALSE`. Should the probability density be +#' returned rather than a number of samples. +#' +#' @param cum Logical, defaults to `TRUE`. If `dist = TRUE` should the returned +#' distribution be cumulative. +#' +#' @param model Character string, defining the model to be used. Supported +#' options are exponential ("exp"), gamma ("gamma"), and log normal +#' ("lognormal") +#' +#' @param discrete Logical, defaults to `FALSE`. Should the probability +#' distribution be discretised. In this case each entry of the probability +#' mass function corresponds to the 1-length interval ending at the entry, +#' i.e. the probability mass function is a vector where the first entry +#' corresponds to the integral over the (0,1] interval of the continuous +#' distribution, the second entry corresponds to the (1,2] interval etc. +#' +#' @param params A list of parameters values (by name) required for each model. +#' For the exponential model this is a rate parameter and for the gamma model +#' this is alpha and beta. +#' +#' @param max_value Numeric, the maximum value to allow. Defaults to 120. +#' Samples outside of this range are resampled. +#' +#' @return A vector of samples or a probability distribution. +#' @export +#' @examples +#' +#' ## Exponential model +#' # sample +#' dist_skel(10, model = "exp", params = list(rate = 1)) +#' +#' # cumulative prob density +#' dist_skel(1:10, model = "exp", dist = TRUE, params = list(rate = 1)) +#' +#' # probability density +#' dist_skel(1:10, +#' model = "exp", dist = TRUE, +#' cum = FALSE, params = list(rate = 1) +#' ) +#' +#' ## Gamma model +#' # sample +#' dist_skel(10, model = "gamma", params = list(shape = 1, rate = 0.5)) +#' +#' # cumulative prob density +#' dist_skel(0:10, +#' model = "gamma", dist = TRUE, +#' params = list(shape = 1, rate = 0.5) +#' ) +#' +#' # probability density +#' dist_skel(0:10, +#' model = "gamma", dist = TRUE, +#' cum = FALSE, params = list(shape = 2, rate = 0.5) +#' ) +#' +#' ## Log normal model +#' # sample +#' dist_skel(10, +#' model = "lognormal", params = list(meanlog = log(5), sdlog = log(2)) +#' ) +#' +#' # cumulative prob density +#' dist_skel(0:10, +#' model = "lognormal", dist = TRUE, +#' params = list(meanlog = log(5), sdlog = log(2)) +#' ) +#' +#' # probability density +#' dist_skel(0:10, +#' model = "lognormal", dist = TRUE, cum = FALSE, +#' params = list(meanlog = log(5), sdlog = log(2)) +#' ) +dist_skel <- function(n, dist = FALSE, cum = TRUE, model, + discrete = FALSE, params, max_value = 120) { + if (model == "exp") { + # define support functions for exponential dist + rdist <- function(n) { + rexp(n, params[["rate"]]) + } + pdist <- function(n) { + pexp(n, params[["rate"]]) / pexp(max_value, params[["rate"]]) + } + ddist <- function(n) { + (pexp(n + 1, params[["rate"]]) - + pexp(n, params[["rate"]])) / + pexp(max_value + 1, params[["rate"]]) + } + } else if (model == "gamma") { + rdist <- function(n) { + rgamma(n = n, shape = params[["shape"]], rate = params[["rate"]]) + } + pdist <- function(n) { + pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]]) / + pgamma( + q = max_value + 1, shape = params[["shape"]], rate = params[["rate"]] + ) + } + ddist <- function(n) { + (pgamma(q = n + 1, shape = params[["shape"]], rate = params[["rate"]]) - + pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]])) / + pgamma(q = max_value + 1, params[["shape"]], rate = params[["rate"]]) + } + } else if (model == "lognormal") { + rdist <- function(n) { + rlnorm(n, params[["meanlog"]], params[["sdlog"]]) + } + pdist <- function(n) { + plnorm(n, params[["meanlog"]], params[["sdlog"]]) / + plnorm(max_value + 1, params[["meanlog"]], params[["sdlog"]]) + } + ddist <- function(n) { + (plnorm(n + 1, params[["meanlog"]], params[["sdlog"]]) - + plnorm(n, params[["meanlog"]], params[["sdlog"]])) / + plnorm(max_value + 1, params[["meanlog"]], params[["sdlog"]]) + } + } else if (model == "normal") { + rdist <- function(n) { + rnorm(n, params[["mean"]], params[["sd"]]) + } + pdist <- function(n) { + pnorm(n, params[["mean"]], params[["sd"]]) / + pnorm(max_value + 1, params[["mean"]], params[["sd"]]) + } + ddist <- function(n) { + (pnorm(n + 1, params[["mean"]], params[["sd"]]) - + pnorm(n, params[["mean"]], params[["sd"]])) / + pnorm(max_value + 1, params[["mean"]], params[["sd"]]) + } + } else if (model == "fixed") { + rdist <- function(n) { + rep(params[["value"]], n) + } + pdist <- function(n) { + as.integer(n > params[["value"]]) + } + ddist <- function(n) { + as.integer(n == params[["value"]]) + } + } + + if (discrete) { + cmf <- c(0, pdist(seq_len(max_value + 1))) + pmf <- diff(cmf) + rdist <- function(n) { + sample( + x = seq_len(max_value + 1) - 1, size = n, prob = pmf, replace = TRUE + ) + } + pdist <- function(n) { + cmf[n + 1] + } + ddist <- function(n) { + pmf[n + 1] + } + } + + # define internal sampling function + inner_skel <- function(n, dist = FALSE, cum = TRUE, max_value = NULL) { + if (dist) { + if (cum) { + ret <- pdist(n) + } else { + ret <- ddist(n) + } + ret[ret > 1] <- NA_real_ + return(ret) + } else { + rdist(n) + } + } + + # define truncation wrapper + truncated_skel <- function(n, dist, cum, max_value) { + n <- inner_skel(n, dist, cum, max_value) + if (!dist) { + while (any(!is.na(n) & n >= max_value)) { + n <- ifelse(n >= max_value, inner_skel(n), n) + } + + n <- as.integer(n) + } + return(n) + } + + # call function + sample <- truncated_skel(n, dist = dist, cum = cum, max_value = max_value) + return(sample) +} + +#' Creates a delay distribution as the sum of two other delay distributions. +#' +#' @return A delay distribution representing the sum of the two delays + +#' @param e1 The first delay distribution (of type [dist_spec()]) to +#' combine. +#' +#' @param e2 The second delay distribution (of type [dist_spec()]) to +#' combine. +#' @method + dist_spec +#' @export +#' @examples +#' # A fixed lognormal distribution with mean 5 and sd 1. +#' dist1 <- LogNormal( +#' meanlog = 1.6, sdlog = 1, max = 20 +#' ) +#' dist1 + dist1 +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma( +#' mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +#' ) +#' dist1 + dist2 +`+.dist_spec` <- function(e1, e2) { + c(e1, e2) +} + +#' Combines multiple delay distributions for further processing +#' +#' This combines the parameters so that they can be fed as multiple delay +#' distributions to [epinow()] or [estimate_infections()]. +#' +#' @param ... The delay distributions (from calls to [dist_spec()]) to combine +#' @return Combined delay distributions (with class ``) +#' @method c dist_spec +#' @export +#' @examples +#' # A fixed lognormal distribution with mean 5 and sd 1. +#' dist1 <- LogNormal( +#' meanlog = 1.6, sdlog = 1, max = 20 +#' ) +#' dist1 + dist1 +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma( +#' mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +#' ) +#' c(dist1, dist2) +c.dist_spec <- function(...) { + ## process delay distributions + dist_specs <- list(...) + if (!(all(vapply(dist_specs, is, FALSE, "dist_spec")))) { + stop( + "Distribution can only be concatenated with other delay ", + "distributions." + ) + } + dist_specs <- do.call(c, lapply(dist_specs, unclass)) + attr(dist_specs, "class") <- c("dist_spec", "list") + return(dist_specs) +} + +#' Returns the mean of one or more delay distribution +#' +#' This works out the mean of all the (parametric / nonparametric) delay +#' distributions combined in the passed [dist_spec()] (ignoring any uncertainty +#' in parameters) +#' +#' @param x The `` to use +#' @param ... Not used +#' @param ignore_uncertainty Logical; whether to ignore any uncertainty in +#' parameters. If set to FALSE (the default) then the mean of any uncertain +#' parameters will be returned as NA. +#' @method mean dist_spec +#' @importFrom utils head +#' @export +#' @examples +#' # A fixed lognormal distribution with mean 5 and sd 1. +#' dist1 <- LogNormal(mean = 5, sd = 1, max = 20) +#' mean(dist1) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma( +#' mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +#' ) +#' mean(dist2) +#' +#' # The mean of the sum of two distributions +#' mean(dist1 + dist2) +mean.dist_spec <- function(x, ..., ignore_uncertainty = FALSE) { + ret <- vapply(x, function(y) { + if (is.numeric(y)) { + return(y) + } + ## y is a dist_spec + if (y$distribution == "nonparametric") { + ## nonparametric + return(sum((seq_along(y$pmf) - 1) * y$pmf)) + } else { + if (!all(vapply(y$parameters, is.numeric, logical(1)))) { + if (ignore_uncertainty) { + y$parameters <- lapply(y$parameters, mean, ignore_uncertainty = TRUE) + } else { + return(NA_real_) + } + } + if (y$distribution == "lognormal") { + return(exp(y$parameters$meanlog + y$parameters$sdlog**2 / 2)) + } else if (y$distribution == "gamma") { + return(y$parameters$shape / y$parameters$rate) + } else if (y$distribution == "normal") { + return(y$parameters$mean) + } else if (y$distribution == "fixed") { + return(y$parameters$value) + } else { + stop( + "Don't know how to calculate mean of ", y$distribution, + " distribution." + ) + } + } + }, numeric(1)) + return(ret) +} + +#' Returns the standard deviation of one or more delay distribution +#' +#' This works out the standard deviation of all the (parametric / +#' nonparametric) delay distributions combined in the passed [dist_spec()]. +#' +#' @param x The [dist_spec()] to use +#' @return A vector of standard deviations. +#' @importFrom utils head +#' @keywords internal +#' @examples +#' \dontrun{ +#' # A fixed lognormal distribution with sd 5 and sd 1. +#' dist1 <- LogNormal(mean = 5, sd = 1, max = 20) +#' sd_dist(dist1) +#' +#' # A gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma(mean = 3, sd = 2) +#' sd_dist(dist2) +#' +#' # The sd of the sum of two distributions +#' sd_dist(dist1 + dist2) +#' } +sd_dist <- function(x) { + ret <- vapply(x, function(y) { + if (is.numeric(y)) { + return(0) + } + ## y is a dist_spec + if (y$distribution == "nonparametric") { + ## nonparametric + mean_pmf <- sum((seq_along(y$pmf) - 1) * y$pmf) + return(sum((seq_along(y$pmf) - 1)**2 * y$pmf) - mean_pmf^2) + } else { + ## parametric + if (!all(vapply(y$parameters, is.numeric, logical(1)))) { + return(NA_real_) + } + if (y$distribution == "lognormal") { + sqrt(exp(y$parameters$sdlog**2) - 1) * + exp(y$parameters$meanlog + 0.5 * y$parameters$sdlog**2) + } else if (y$distribution == "gamma") { + sqrt(y$parameters$shape / y$parameters$rate**2) + } else if (y$distribution == "normal") { + y$parameters$sd + } else if (y$distribution == "fixed") { + 0 + } else { + stop( + "Don't know how to calculate standard deviation of ", + y$distribution, " distribution." + ) + } + } + }, numeric(1)) + return(ret) +} + +#' Returns the maximum of one or more delay distribution +#' +#' This works out the maximum of all the (parametric / nonparametric) delay +#' distributions combined in the passed [dist_spec()] (ignoring any uncertainty +#' in parameters) +#' +#' @param x The [dist_spec()] to use +#' @param ... Not used +#' @return A vector of means. +#' @method max dist_spec +#' @export +#' @examples +#' # A fixed gamma distribution with mean 5 and sd 1. +#' dist1 <- Gamma(mean = 5, sd = 1, max = 20) +#' max(dist1) +#' +#' # An uncertain lognormal distribution with mean 3 and sd 2 +#' dist2 <- LogNormal(mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20) +#' max(dist2) +#' +#' # The max the sum of two distributions +#' max(dist1 + dist2) +max.dist_spec <- function(x, ...) { + ret <- vapply(x, function(y) { + ## y is a dist_spec + if (y$distribution == "nonparametric") { + ## nonparametric + return(length(y$pmf) - 1) + } else if (y$distribution == "fixed") { + return(y$parameters$value) + } else { + return(y$max) + } + }, numeric(1)) + return(ret) +} + +#' Discretise a +#' +#' By default it will discretise all the distributions it can discretise +#' (i.e. those with finite support and constant parameters). +#' @title Discretise a +#' @param x A `` +#' @param silent Logical; if `TRUE` then any distribution that can't be +#' discretised will be returned as is. If `FALSE` then an error will be +#' thrown. +#' @return A `` where all distributions with constant parameters are +#' nonparametric. +#' @export +#' @examples +#' # A fixed gamma distribution with mean 5 and sd 1. +#' dist1 <- Gamma(mean = 5, sd = 1, max = 20) +#' +#' # An uncertain lognormal distribution with mean 3 and sd 2 +#' dist2 <- LogNormal(mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20) +#' +#' # The maxf the sum of two distributions +#' discretise(dist1 + dist2) +discretise <- function(x, silent = TRUE) { + if (!is(x, "dist_spec")) { + stop("Can only discretise a .") + } + ## check max + max_x <- max(x) + if (any(is.infinite(max_x)) && !silent) { + stop("Cannot discretise a distribution with infinite support.") + } + ## discretise + ret <- lapply(seq_along(x), function(id) { + y <- x[[id]] + if (y$distribution == "nonparametric") { + return(y) + } else { + if (all(vapply(y$parameters, is.numeric, logical(1))) && + is.finite(max_x[id])) { + z <- list(pmf = dist_skel( + n = seq_len(max_x[id] + 1) - 1, dist = TRUE, cum = FALSE, + model = y$distribution, params = y$parameters, + max_value = max_x[id], discrete = TRUE + )) + z$distribution <- "nonparametric" + return(z) + } else if (silent) { + return(y) + } else { + stop( + "Cannot discretise a distribution with uncertain parameters." + ) + } + } + }) + attr(ret, "class") <- c("dist_spec", "list") + return(ret) +} +#' @rdname discretise +#' @export +discretize <- discretise + +#' Collapse nonparametric distributions in a +#' +#' This convolves any consecutive nonparametric distributions contained +#' in the . +#' @param x A `` +#' @return A `` where consecutive nonparametric distributions +#' have been convolved +#' @importFrom stats convolve +#' @export +#' @examples +#' # A fixed gamma distribution with mean 5 and sd 1. +#' dist1 <- Gamma(mean = 5, sd = 1, max = 20) +#' +#' # An uncertain lognormal distribution with mean 3 and sd 2 +#' dist2 <- LogNormal(mean = 3, sd = 2, max = 20) +#' +#' # The maxf the sum of two distributions +#' collapse(discretise(dist1 + dist2)) +collapse <- function(x) { + if (!is(x, "dist_spec")) { + stop("Can only convolve distributions in a .") + } + ## get nonparametric distributions + nonparametric <- unname(unlist(map(x, "distribution"))) == "nonparametric" + ## find consecutive nonparametric distributions + consecutive <- rle(nonparametric) + ids <- unique(c(1, cumsum(consecutive$lengths[-length(consecutive$lengths)]))) + ## find ids of nonparametric distributions that are collapsable + ## (i.e. have other nonparametric distributions followign them) + collapseable <- ids[consecutive$values & (consecutive$length > 1)] + ## identify ids of distributions that follow the collapseable distributions + next_ids <- lapply(collapseable, function(id) { + ids[id] + seq_len(consecutive$lengths[id] - 1) + }) + for (id in collapseable) { + ## collapse distributions + for (next_id in next_ids[id]) { + x[[ids[id]]]$pmf <- convolve( + x[[ids[id]]]$pmf, rev(x[[next_id]]$pmf), type = "open" + ) + } + } + ## remove collapsed pmfs + x[unlist(next_ids)] <- NULL + + return(x) +} + +#' Applies a threshold to all nonparametric distributions in a +#' +#' This removes any part of the tail of the nonparametric distributions in the +#' where the probability mass is below the threshold level. +#' @param x A `` +#' @param tolerance Numeric; the desired tolerance level. +#' @return A `` where probability masses below the threshold level +#' have been removed +#' @export +#' @examples +#' dist <- discretise(Gamma(mean = 5, sd = 1, max = 20)) +#' apply_tolerance(dist, 0.01) +apply_tolerance <- function(x, tolerance) { + if (!is(x, "dist_spec")) { + stop("Can only apply tolerance to distributions in a .") + } + x <- lapply(x, function(x) { + if (x$distribution == "nonparametric") { + cmf <- cumsum(x$pmf) + new_pmf <- x$pmf[c(TRUE, (1 - cmf[-length(cmf)]) >= tolerance)] + x$pmf <- new_pmf / sum(new_pmf) + return(x) + } else { + return(x) + } + }) + + attr(x, "class") <- c("dist_spec", "list") + return(x) +} + +#' Prints the parameters of one or more delay distributions +#' +#' This displays the parameters of the uncertain and probability mass +#' functions of fixed delay distributions combined in the passed [dist_spec()]. +#' @param x The `` to use +#' @param ... Not used +#' @return invisible +#' @method print dist_spec +#' @export +#' @examples +#' #' # A fixed lognormal distribution with mean 5 and sd 1. +#' dist1 <- LogNormal(mean = 1.5, sd = 0.5, max = 20) +#' print(dist1) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma( +#' mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +#' ) +#' print(dist2) +print.dist_spec <- function(x, ...) { + .print.dist_spec(x, indent = 0, ...) +} + +.print.dist_spec <- function(x, indent, ...) { + indent_str <- strrep(" ", indent) + if (length(x) > 1) { + cat(indent_str, "Composite distribution:\n", sep = "") + } + for (i in seq_along(x)) { + if (x[[i]]$distribution == "nonparametric") { + ## nonparametric + cat( + indent_str, "- nonparametric distribution\n", indent_str, " PMF: [", + paste(signif(x[[i]]$pmf, digits = 2), collapse = " "), "]\n", + sep = "" + ) + } else if (x[[i]]$distribution == "fixed") { + ## fixed + cat(indent_str, "- fixed value:\n", sep = "") + if (is.numeric(x[[i]]$parameters$value)) { + cat(indent_str, " ", x[[i]]$parameters$value, "\n", sep = "") + } else { + .print.dist_spec(x[[i]]$parameters$value, indent = indent + 4) + } + } else { + ## parametric + cat(indent_str, "- ", x[[i]]$distribution, " distribution", sep = "") + if (is.finite(x[[i]]$max)) { + cat(" (max: ", x[[i]]$max, ")", sep = "") + } + cat(":\n") + ## loop over natural parameters and print + for (param in names(x[[i]]$parameters)) { + cat( + indent_str, " ", param, ":\n", sep = "" + ) + if (is.numeric(x[[i]]$parameters[[param]])) { + cat( + indent_str, " ", + signif(x[[i]]$parameters[[param]], digits = 2), "\n", + sep = "" + ) + } else { + .print.dist_spec(x[[i]]$parameters[[param]], indent = indent + 4) + } + } + } + } +} + +#' Plot PMF and CDF for a dist_spec object +#' +#' This function takes a `` object and plots its probability mass +#' function (PMF) and cumulative distribution function (CDF) using `{ggplot2}`. +#' Note that currently uncertainty in distributions is not plot. +#' +#' @param x A `` object +#' @param ... Additional arguments to pass to `{ggplot}`. +#' @importFrom ggplot2 aes geom_col geom_step facet_wrap vars theme_bw +#' @export +#' @examples +#' #' # A fixed lognormal distribution with mean 5 and sd 1. +#' dist1 <- LogNormal(mean = 1.6, sd = 0.5, max = 20) +#' plot(dist1) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma( +#' mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +#' ) +#' plot(dist2) +#' +#' # Multiple distributions +#' plot(dist1 + dist2 + dist1) +#' +#' # A combination of the two fixed distributions +#' plot(dist1 + dist1) +plot.dist_spec <- function(x, ...) { + distribution <- cdf <- NULL + # Get the PMF and CDF data + pmf_data <- data.frame( + value = numeric(), pmf = numeric(), + distribution = factor() + ) + cdf_data <- data.frame( + value = numeric(), cdf = numeric(), + distribution = factor() + ) + dist_sd <- sd_dist(x) + for (i in seq_along(x)) { + if (x[[i]]$distribution == "nonparametric") { + # Fixed distribution + pmf <- x[[i]]$pmf + dist_name <- paste0("Nonparametric", " (ID: ", i, ")") + } else { + # Uncertain distribution + c_dist <- discretise(fix_dist(extract_single_dist(x, i))) + pmf <- c_dist[[1]]$pmf + dist_name <- paste0( + ifelse(is.na(dist_sd[i]), "Uncertain ", ""), + x[[i]]$distribution, " (ID: ", i, ")" + ) + } + pmf_data <- rbind( + pmf_data, + data.frame( + value = seq_along(pmf), pmf = pmf, distribution = dist_name + ) + ) + cumsum_pmf <- cumsum(pmf) + cdf_data <- rbind( + cdf_data, + data.frame( + value = seq_along(pmf), cdf = cumsum_pmf / sum(pmf), + distribution = dist_name + ) + ) + } + + # Plot PMF and CDF as facets in the same plot + plot <- ggplot() + + aes(x = value, y = pmf) + + geom_col(data = pmf_data) + + geom_step(data = cdf_data, aes(y = cdf)) + + facet_wrap(vars(distribution)) + + labs(x = "Day", y = "Probability density") + + theme_bw() + return(plot) +} + +#' Extract a single element of a composite `` +#' +#' @param x A composite `dist_spec` object +#' @param i The index to extract +#' @return A single `dist_spec` object +#' @keywords internal +#' @examples +#' dist1 <- LogNormal(mean = 1.6, sd = 0.5, max = 20) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist2 <- Gamma( +#' mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +#' ) +#' +#' # Multiple distributions +#' \dontrun{ +#' dist <- dist1 + dist2 +#' extract_single_dist(dist, 2) +#' } +extract_single_dist <- function(x, i) { + if (i > length(x)) { + stop("i can't be greater than the number of distributions.") + } + ret <- list(x[[i]]) + attr(ret, "class") <- c("dist_spec", class(ret)) + return(ret) +} + +#' Fix the parameters of a `` +#' +#' If the given `` has any uncertainty, it is removed and the +#' corresponding distribution converted into a fixed one. +#' @return A `` object without uncertainty +#' @export +#' @param x A `` +#' @param strategy Character; either "mean" (use the mean estimates of the +#' mean and standard deviation) or "sample" (randomly sample mean and +#' standard deviation from uncertainty given in the `` +#' @importFrom truncnorm rtruncnorm +#' @importFrom rlang arg_match +#' @examples +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist <- LogNormal( +#' meanlog = Normal(3, 0.5), sdlog = Normal(2, 0.5), max = 20 +#' ) +#' +#' fix_dist(dist) +fix_dist <- function(x, strategy = c("mean", "sample")) { + if (!is(x, "dist_spec")) { + stop("Can only fix distributions in a .") + } + ## match strategy argument to options + strategy <- arg_match(strategy) + + ret <- lapply(x, function(x) { + ## if x is fixed already we don't have to do anything + if ( + x$distribution == "nonparametric" || + all(vapply(x$parameters, is.numeric, logical(1))) + ) { + return(x) + } + ## apply strategy depending on choice + if (strategy == "mean") { + x$parameters <- lapply(x$parameters, mean) + } else if (strategy == "sample") { + lower_bound <- + lower_bounds(x$distribution)[natural_params(x$distribution)] + mean <- as.list(rtruncnorm( + n = 1, a = lower_bound, + mean = vapply(x$parameters, mean, numeric(1)), + sd = vapply(x$parameters, sd_dist, numeric(1)) + )) + names(mean) <- names(x$parameters) + x$parameters <- mean + } + return(x) + }) + + attr(ret, "class") <- c("dist_spec", "list") + return(ret) +} + +#' @details +#' Probability distributions are ubiquitous in EpiNow2, usually representing +#' epidemiological delays (e.g., the generation time for delays between +#' becoming infecting and infecting others; or reporting delays) +#' +#' They are generated using functions that have a name corresponding to the +#' probability distribution that is being used. They generated `dist_spec` +#' objects that are then passed to the models underlying EpiNow2. +## +#' All parameters can be given either as fixed values (a numeric value) or as +#' uncertain values (a `dist_sepc`). If given as uncertain values, currently +#' only normally distributed parameters (generated using `Normal()`) are +#' supported. +#' +#' Each distribution has a representation in terms of "natural" parameters (the +#' ones used in stan) but can sometimes also be specified using other +#' parameters such as the mean or standard deviation of the distribution. If +#' not given as natural parameters then these will be calculated from the given +#' parameters. If they have uncertainty, this will be done by random sampling +#' from the given uncertainty and converting resulting parameters to their +#' natural representation. +#' +#' Currently available distributions are lognormal, gamma, normal, fixed +#' (delta) and nonparametric. The nonparametric is a special case where the +#' probability mass function is given directly as a numeric vector. +#' +#' @inheritParams stats::Lognormal +#' @param mean,sd mean and standard deviation of the distribution +#' @param max Numeric, maximum value of the distribution. The distribution will +#' be truncated at this value. Default: `Inf`, i.e. no maximum. +#' @return A `dist_spec` representing a distribution of the given +#' specification. +#' @export +#' @rdname Distributions +#' @name Distributions +#' @order 1 +#' @examples +#' LogNormal(mean = 4, sd = 1) +#' LogNormal(mean = 4, sd = 1, max = 10) +#' LogNormal(mean = Normal(4, 1), sd = 1, max = 10) +LogNormal <- function(meanlog, sdlog, mean, sd, max = Inf) { + params <- as.list(environment()) + return(new_dist_spec(params, "lognormal")) +} + +#' @inheritParams stats::GammaDist +#' @rdname Distributions +#' @title Probability distributions +#' @order 2 +#' @export +#' @examples +#' Gamma(mean = 4, sd = 1) +#' Gamma(shape = 16, rate = 4) +#' Gamma(shape = Normal(16, 2), rate = Normal(4, 1)) +#' Gamma(shape = Normal(16, 2), scale = Normal(1/4, 1)) +Gamma <- function(shape, rate, scale, mean, sd, max = Inf) { + params <- as.list(environment()) + return(new_dist_spec(params, "gamma")) +} + +#' @rdname Distributions +#' @order 3 +#' @export +#' @examples +#' Normal(mean = 4, sd = 1) +#' Normal(mean = 4, sd = 1, max = 10) +Normal <- function(mean, sd, max = Inf) { + params <- as.list(environment()) + return(new_dist_spec(params, "normal")) +} + +#' @rdname Distributions +#' @order 4 +#' @param value Value of the fixed (delta) distribution +#' @export +#' @examples +#' Fixed(value = 3) +#' Fixed(value = 3.5) +Fixed <- function(value, max = Inf) { + params <- as.list(environment()) + return(new_dist_spec(params, "fixed")) +} + +#' Generates a nonparametric distribution. +#' +#' @param pmf Probability mass of the given distribution; this is +#' passed as a zero-indexed numeric vector (i.e. the fist entry represents +#' the probability mass of zero). If not summing to one it will be normalised +#' to sum to one internally. +#' @rdname Distributions +#' @order 5 +#' @export +#' @examples +#' NonParametric(c(0.1, 0.3, 0.2, 0.4)) +#' NonParametric(c(0.1, 0.3, 0.2, 0.1, 0.1)) +NonParametric <- function(pmf) { + params <- list(pmf = pmf / sum(pmf)) + return(new_dist_spec(params, "nonparametric")) +} + +#' Get the names of the natural parameters of a distribution +#' +#' These are the parameters used in the stan models. All other parameter +#' representations are converted to these using [convert_to_natural()] before +#' being passed to the stan models. +#' @param distribution Character; the distribution to use. +#' @return A character vector, the natural parameters. +#' @keywords internal +#' @examples +#' \dontrun{ +#' natural_params("gamma") +#' } +natural_params <- function(distribution) { + if (distribution == "gamma") { + ret <- c("shape", "rate") + } else if (distribution == "lognormal") { + ret <- c("meanlog", "sdlog") + } else if (distribution == "normal") { + ret <- c("mean", "sd") + } else if (distribution == "fixed") { + ret <- "value" + } + return(ret) +} + +#' Get the lower bounds of the parameters of a distribution +#' +#' This is used to avoid sampling parameter values that have no support. +#' @return A numeric vector, the lower bounds. +#' @inheritParams natural_params +#' @keywords internal +#' @examples +#' \dontrun{ +#' lower_bounds("lognormal") +#' } +lower_bounds <- function(distribution) { + if (distribution == "gamma") { + ret <- c(shape = 0, rate = 0, scale = 0, mean = 0, sd = 0) + } else if (distribution == "lognormal") { + ret <- c(meanlog = -Inf, sdlog = 0, mean = 0, sd = 0) + } else if (distribution == "normal") { + ret <- c(mean = -Inf, sd = 0) + } else if (distribution == "fixed") { + ret <- c(value = 1) + } + return(ret) +} + +#' Internal function for extracting given parameter names of a distribution +#' from the environment. Called by `new_dist_spec` +#' +#' @param params Given parameters (obtained using `as.list(environment())`) +#' @return A character vector of parameters and their values. +#' @inheritParams natural_params +#' @keywords internal +extract_params <- function(params, distribution) { + params <- params[!vapply(params, inherits, "name", FUN.VALUE = TRUE)] + n_params <- length(natural_params(distribution)) + if (length(params) != n_params) { + stop( + "Exactly ", n_params, " parameters of the ", distribution, + " distribution must be specified." + ) + } + return(params) +} + +#' Internal function for generating a `dist_spec` given parameters and a +#' distribution. +#' +#' This will convert all parameters to natural parameters before generating +#' a `dist_spec`. If they have uncertainty this will be done using sampling. +#' @param params Parameters of the distribution (including `max`) +#' @inheritParams extract_params +#' @importFrom purrr walk +#' @return A `dist_spec` of the given specification. +#' @keywords internal +#' @examples +#' \dontrun{ +#' new_dist_spec( +#' params = list(mean = 2, sd = 1, max = Inf), +#' distribution = "normal" +#' ) +#' } +new_dist_spec <- function(params, distribution) { + if (distribution == "nonparametric") { + ## nonparametric distribution + ret <- list( + pmf = params$pmf, + distribution = "nonparametric" + ) + } else { + ## process min/max first + max <- params$max + params$max <- NULL + ## extract parameters and convert all to dist_spec + params <- extract_params(params, distribution) + ## fixed distribution + if (distribution == "fixed") { + ret <- list( + parameters = params, + distribution = "fixed" + ) + } else { + ## parametric probability distribution + ## check bounds + for (param_name in names(params)) { + lb <- lower_bounds(distribution)[param_name] + if (is.numeric(params[[param_name]]) && params[[param_name]] < lb) { + stop( + "Parameter ", param_name, " is less than its lower bound ", lb, + "." + ) + } else if ( + is(params[[param_name]], "dist") && params[[param_name]]$max < lb + ) { + stop( + "Maximum of parameter ", param_name, " is less than its ", + "lower bound ", lb, "." + ) + } + } + + ## convert any unnatural parameters + unnatural_params <- setdiff(names(params), natural_params(distribution)) + if (length(unnatural_params) > 0) { + ## sample parameters if they are uncertain + if (any(vapply(params, sd_dist, numeric(1)) > 0)) { + warning( + "Uncertain ", distribution, " distribution specified in terms of ", + "parameters that are not the \"natural\" parameters of the ", + "distribution (", toString(natural_params(distribution)), + "). Converting using a crude and very approximate method ", + "that is likely to produce biased results. If possible, ", + "it is preferable to specify the distribution directly ", + "in terms of the natural parameters." + ) + } + ## generate natural parameters + params <- convert_to_natural(params, distribution) + } + ## convert normal with sd == 0 to fixed + if (distribution == "normal" && is.numeric(params$sd) && params$sd == 0) { + ret <- list( + parameters = list(value = params$mean), distribution = "fixed" + ) + } else { + ret <- list(parameters = params, distribution = distribution) + } + } + ret <- c(ret, list(max = max)) + } + ## join and wrap in another list to make concatenating easier + ret <- list(ret) + attr(ret, "class") <- c("dist_spec", "list") + + ## now we have a distribution with natural parameters - return dist_spec + return(ret) +} + +#' Internal function for converting parameters to natural parameters. +#' +#' This is used for preprocessing before generating a `dist_spec` object +#' from a given set of parameters and distribution +#' @param params A numerical named parameter vector +#' @inheritParams natural_params +#' @return A list with two elements, `params_mean` and `params_sd`, containing +#' mean and sd of natural parameters. +#' @keywords internal +#' @examples +#' \dontrun{ +#' convert_to_natural( +#' params = list(mean = 2, sd = 1, max = Inf), +#' distribution = "gamma" +#' ) +#' } +convert_to_natural <- function(params, distribution) { + ## unnatural parameter means + ux <- lapply(params, mean) + ## estimate relative uncertainty of parameters + rel_unc <- mean(vapply(params, sd_dist, numeric(1))^2 / unlist(ux)) + ## store natural parameters + x <- list() + if (distribution == "gamma") { + ## given as mean and sd + if ("mean" %in% names(ux) && "sd" %in% names(ux)) { + x$shape <- ux$mean**2 / ux$sd**2 + x$rate <- x$shape / ux$mean + } else { + ## convert scale => rate + if ("scale" %in% names(ux)) { + x$rate <- 1 / ux$scale + } else { + x$rate <- ux$rate + } + x$shape <- ux$shape + } + } else if (distribution == "lognormal") { + if ("mean" %in% names(params) && "sd" %in% names(params)) { + x$meanlog <- log(ux$mean^2 / sqrt(ux$sd^2 + ux$mean^2)) + x$sdlog <- convert_to_logsd(ux$mean, ux$sd) + } else { + x$meanlog <- ux$meanlog + x$sdlog <- ux$sdlog + } + } + ## sort + x <- x[natural_params(distribution)] + if (anyNA(names(x))) { + stop( + "Incompatible combination of parameters of a ", distribution, + " distribution specified:\n ", toString(names(params)), + "." + ) + } + if (rel_unc > 0) { + params <- lapply(names(x), function(param_name) { + Normal(mean = x[[param_name]], sd = sqrt(abs(x[[param_name]]) * rel_unc)) + }) + names(params) <- names(x) + } else { + params <- x + } + return(params) +} diff --git a/R/epinow.R b/R/epinow.R index 827fb86dd..1db6c1d3d 100644 --- a/R/epinow.R +++ b/R/epinow.R @@ -42,30 +42,21 @@ #' #' # set an example generation time. In practice this should use an estimate #' # from the literature or be estimated from data -#' generation_time <- dist_spec( -#' mean = 3.6, -#' mean_sd = 0.7, -#' sd = 3.1, -#' sd_sd = 0.8, +#' generation_time <- Gamma( +#' shape = Normal(1.3, 0.3), +#' rate = Normal(0.37, 0.09), #' max = 14 #' ) #' # set an example incubation period. In practice this should use an estimate #' # from the literature or be estimated from data -#' incubation_period <- dist_spec( -#' mean = 1.6, -#' mean_sd = 0.06, -#' sd = 0.4, -#' sd_sd = 0.07, +#' incubation_period <- LogNormal( +#' meanlog = Normal(1.6, 0.06), +#' sdlog = Normal(0.4, 0.07), #' max = 14 #' ) #' # set an example reporting delay. In practice this should use an estimate #' # from the literature or be estimated from data -#' reporting_delay <- dist_spec( -#' mean = convert_to_logmean(2, 1), -#' sd = convert_to_logsd(2, 1), -#' max = 10, -#' dist = "lognormal" -#' ) +#' reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) #' #' # example case data #' reported_cases <- example_confirmed[1:40] diff --git a/R/estimate_delay.R b/R/estimate_delay.R new file mode 100644 index 000000000..dbb875c1c --- /dev/null +++ b/R/estimate_delay.R @@ -0,0 +1,265 @@ +#' Fit an Integer Adjusted Exponential, Gamma or Lognormal distributions +#' +#' @description `r lifecycle::badge("stable")` +#' Fits an integer adjusted exponential, gamma or lognormal distribution using +#' stan. +#' @param values Numeric vector of values +#' +#' @param samples Numeric, number of samples to take. Must be >= 1000. +#' Defaults to 1000. +#' +#' @param dist Character string, which distribution to fit. Defaults to +#' exponential (`"exp"`) but gamma (`"gamma"`) and lognormal (`"lognormal"`) are +#' also supported. +#' +#' @param cores Numeric, defaults to 1. Number of CPU cores to use (no effect +#' if greater than the number of chains). +#' +#' @param chains Numeric, defaults to 2. Number of MCMC chains to use. More is +#' better with the minimum being two. +#' +#' @param verbose Logical, defaults to FALSE. Should verbose progress messages +#' be printed. +#' +#' @return A stan fit of an interval censored distribution +#' @export +#' @inheritParams stan_opts +#' @examples +#' \donttest{ +#' # integer adjusted exponential model +#' dist_fit(rexp(1:100, 2), +#' samples = 1000, dist = "exp", +#' cores = ifelse(interactive(), 4, 1), verbose = TRUE +#' ) +#' +#' +#' # integer adjusted gamma model +#' dist_fit(rgamma(1:100, 5, 5), +#' samples = 1000, dist = "gamma", +#' cores = ifelse(interactive(), 4, 1), verbose = TRUE +#' ) +#' +#' # integer adjusted lognormal model +#' dist_fit(rlnorm(1:100, log(5), 0.2), +#' samples = 1000, dist = "lognormal", +#' cores = ifelse(interactive(), 4, 1), verbose = TRUE +#' ) +#' } +dist_fit <- function(values = NULL, samples = 1000, cores = 1, + chains = 2, dist = "exp", verbose = FALSE, + backend = "rstan") { + if (samples < 1000) { + samples <- 1000 + warning(sprintf("%s %s", "`samples` must be at least 1000.", + "Now setting it to 1000 internally." + ) + ) + } + # model parameters + lows <- values - 1 + lows <- ifelse(lows <= 0, 1e-6, lows) + ups <- values + 1 + + data <- list( + N = length(values), + low = lows, + up = ups, + lam_mean = numeric(0), + prior_mean = numeric(0), + prior_sd = numeric(0), + par_sigma = numeric(0) + ) + + model <- stan_model(backend, "dist_fit") + + if (dist == "exp") { + data$dist <- 0 + data$lam_mean <- array(mean(values)) + } else if (dist == "lognormal") { + data$dist <- 1 + data$prior_mean <- array(log(mean(values))) + data$prior_sd <- array(log(sd(values))) + } else if (dist == "gamma") { + data$dist <- 2 + data$prior_mean <- array(mean(values)) + data$prior_sd <- array(sd(values)) + data$par_sigma <- array(1.0) + } + + # set adapt delta based on the sample size + if (length(values) <= 30) { + adapt_delta <- 0.999 + } else { + adapt_delta <- 0.9 + } + + # fit model + args <- create_stan_args( + stan = stan_opts( + model, + samples = samples, + warmup = 1000, + control = list(adapt_delta = adapt_delta), + chains = chains, + cores = cores, + backend = backend + ), + data = data, verbose = verbose, model = "dist_fit" + ) + + fit <- fit_model(args, id = "dist_fit") + + return(fit) +} + + +#' Fit a Subsampled Bootstrap to Integer Values and Summarise Distribution +#' Parameters +#' +#' @description `r lifecycle::badge("stable")` +#' Fits an integer adjusted distribution to a subsampled bootstrap of data and +#' then integrates the posterior samples into a single set of summary +#' statistics. Can be used to generate a robust reporting delay that accounts +#' for the fact the underlying delay likely varies over time or that the size +#' of the available reporting delay sample may not be representative of the +#' current case load. +#' +#' @param values Integer vector of values. +#' +#' @param dist Character string, which distribution to fit. Defaults to +#' lognormal (`"lognormal"`) but gamma (`"gamma"`) is also supported. +#' +#' @param verbose Logical, defaults to `FALSE`. Should progress messages be +#' printed. +#' +#' @param samples Numeric, number of samples to take overall from the +#' bootstrapped posteriors. +#' +#' @param bootstraps Numeric, defaults to 1. The number of bootstrap samples +#' (with replacement) of the delay distribution to take. +#' +#' @param bootstrap_samples Numeric, defaults to 100. The number of samples to +#' take in each bootstrap. When the sample size of the supplied delay +#' distribution is less than 100 this is used instead. +#' +#' @param max_value Numeric, defaults to the maximum value in the observed +#' data. Maximum delay to allow (added to output but does impact fitting). +#' +#' @return A `` object summarising the bootstrapped distribution +#' @importFrom purrr list_transpose +#' @importFrom future.apply future_lapply +#' @importFrom rstan extract +#' @importFrom data.table data.table rbindlist +#' @export +#' @examples +#' \donttest{ +#' # lognormal +#' delays <- rlnorm(500, log(5), 1) +#' out <- bootstrapped_dist_fit(delays, +#' samples = 1000, bootstraps = 10, +#' dist = "lognormal" +#' ) +#' out +#' } +bootstrapped_dist_fit <- function(values, dist = "lognormal", + samples = 2000, bootstraps = 10, + bootstrap_samples = 250, max_value, + verbose = FALSE) { + if (!dist %in% c("gamma", "lognormal")) { + stop("Only lognormal and gamma distributions are supported") + } + + if (samples < bootstraps) { + samples <- bootstraps + } + ## Make values integer if not + values <- as.integer(values) + ## Remove NA values + values <- values[!is.na(values)] + ## Filter out negative values + values <- values[values >= 0] + + get_single_dist <- function(values, samples = 1) { + set_dt_single_thread() + + fit <- EpiNow2::dist_fit(values, samples = samples, dist = dist) + + + out <- list() + if (dist == "lognormal") { + out$mean_samples <- sample(extract(fit)$mu, samples) + out$sd_samples <- sample(extract(fit)$sigma, samples) + } else if (dist == "gamma") { + alpha_samples <- sample(extract(fit)$alpha, samples) + beta_samples <- sample(extract(fit)$beta, samples) + out$mean_samples <- alpha_samples / beta_samples + out$sd_samples <- sqrt(alpha_samples) / beta_samples + } + return(out) + } + + if (bootstraps == 1) { + dist_samples <- get_single_dist(values, samples = samples) + } else { + ## Fit each sub sample + dist_samples <- future.apply::future_lapply(1:bootstraps, + function(boot) { + get_single_dist( + sample(values, + min(length(values), bootstrap_samples), + replace = TRUE + ), + samples = ceiling(samples / bootstraps) + ) + }, + future.scheduling = Inf, + future.globals = c( + "values", "bootstraps", "samples", + "bootstrap_samples", "get_single_dist" + ), + future.packages = "data.table", future.seed = TRUE + ) + + + dist_samples <- purrr::list_transpose(dist_samples, simplify = FALSE) + dist_samples <- purrr::map(dist_samples, unlist) + } + + out <- list() + out$mean <- mean(dist_samples$mean_samples) + out$mean_sd <- sd(dist_samples$mean_samples) + out$sd <- mean(dist_samples$sd_sample) + out$sd_sd <- sd(dist_samples$sd_samples) + if (!missing(max_value)) { + out$max <- max_value + } else { + out$max <- max(values) + } + return(do.call(dist_spec, out)) +} + +#' Estimate a Delay Distribution +#' +#' @description `r lifecycle::badge("maturing")` +#' Estimate a log normal delay distribution from a vector of integer delays. +#' Currently this function is a simple wrapper for [bootstrapped_dist_fit()]. +#' +#' @param delays Integer vector of delays +#' +#' @param ... Arguments to pass to internal methods. +#' +#' @return A `` summarising the bootstrapped distribution +#' @export +#' @seealso [bootstrapped_dist_fit()] +#' @examples +#' \donttest{ +#' delays <- rlnorm(500, log(5), 1) +#' estimate_delay(delays, samples = 1000, bootstraps = 10) +#' } +estimate_delay <- function(delays, ...) { + fit <- bootstrapped_dist_fit( + values = delays, + dist = "lognormal", ... + ) + return(fit) +} diff --git a/R/estimate_infections.R b/R/estimate_infections.R index f6f60bda9..b69390ed9 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -76,31 +76,21 @@ #' #' # set an example generation time. In practice this should use an estimate #' # from the literature or be estimated from data -#' generation_time <- dist_spec( -#' mean = 3.6, -#' mean_sd = 0.7, -#' sd = 3.1, -#' sd_sd = 0.8, +#' generation_time <- Gamma( +#' shape = Normal(1.3, 0.3), +#' rate = Normal(0.37, 0.09), #' max = 14 #' ) #' # set an example incubation period. In practice this should use an estimate #' # from the literature or be estimated from data -#' incubation_period <- dist_spec( -#' mean = 1.6, -#' mean_sd = 0.06, -#' sd = 0.4, -#' sd_sd = 0.07, +#' incubation_period <- LogNormal( +#' meanlog = Normal(1.6, 0.06), +#' sdlog = Normal(0.4, 0.07), #' max = 14 #' ) #' # set an example reporting delay. In practice this should use an estimate #' # from the literature or be estimated from data -#' reporting_delay <- dist_spec( -#' mean = convert_to_logmean(2, 1), -#' sd = convert_to_logsd(2, 1), -#' max = 10, -#' dist = "lognormal" -#' ) -#' +#' reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) #' #' # for more examples, see the "estimate_infections examples" vignette #' def <- estimate_infections(reported_cases, @@ -252,7 +242,7 @@ estimate_infections <- function(reported_cases, ) ## Add prior infections - if (delays$n > 0) { + if (length(delays) > 0) { out$prior_infections <- shifted_cases[ , .( diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index bc29dd45d..b12d69816 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -136,9 +136,10 @@ estimate_secondary <- function(reports, secondary = secondary_opts(), delays = delay_opts( - dist_spec( - mean = 2.5, mean_sd = 0.5, - sd = 0.47, sd_sd = 0.25, max = 30 + LogNormal( + meanlog = Normal(2.5, 0.5), + sdlog = Normal(0.47, 0.25), + max = 30 ) ), truncation = trunc_opts(), @@ -238,19 +239,19 @@ estimate_secondary <- function(reports, #' #' @description `r lifecycle::badge("stable")` #' This functions allows the user to more easily specify data driven or model -#' based priors for [estimate_secondary()] from example from previous model fits -#' using a `` to overwrite other default settings. Note that default -#' settings are still required. +#' based priors for [estimate_secondary()] from example from previous model +#' fits using a `` to overwrite other default settings. Note that +#' default settings are still required. #' #' @param data A list of data and arguments as returned by `create_stan_data()`. #' #' @param priors A `` of named priors to be used in model fitting -#' rather than the defaults supplied from other arguments. This is typically -#' useful if wanting to inform a estimate from the posterior of another model -#' fit. Priors that are currently use to update the defaults are the scaling -#' fraction ("frac_obs"), the mean delay ("delay_mean"), and standard deviation -#' of the delay ("delay_sd"). The `` should have the following -#' variables: `variable`, `mean`, and `sd`. +#' rather than the defaults supplied from other arguments. This is typically +#' useful if wanting to inform a estimate from the posterior of another model +#' fit. Priors that are currently use to update the defaults are the scaling +#' fraction ("frac_obs"), and delay parameters ("delay_params"). The +#' `` should have the following variables: `variable`, `mean`, and +#' `sd`. #' #' @return A list as produced by `create_stan_data()`. #' @export @@ -275,18 +276,15 @@ update_secondary_args <- function(data, priors, verbose = TRUE) { data$obs_scale_sd <- as.array(signif(scale$sd, 3)) } # replace delay parameters if present - delay_mean <- priors[grepl("delay_mean", variable, fixed = TRUE)] - delay_sd <- priors[grepl("delay_sd", variable, fixed = TRUE)] - if (nrow(delay_mean) > 0) { - if (is.null(data$delay_mean_mean)) { + delay_params <- priors[grepl("delay_params", variable, fixed = TRUE)] + if (nrow(delay_params) > 0) { + if (is.null(data$delay_params_mean)) { warning( "Cannot replace delay distribution parameters as no default has been set" # nolint ) } - data$delay_mean_mean <- as.array(signif(delay_mean$mean, 3)) - data$delay_mean_sd <- as.array(signif(delay_mean$sd, 3)) - data$delay_sd_mean <- as.array(signif(delay_sd$mean, 3)) - data$delay_sd_sd <- as.array(signif(delay_sd$sd, 3)) + data$delay_params_mean <- as.array(signif(delay_params$mean, 3)) + data$delay_params_sd <- as.array(signif(delay_params$sd, 3)) } phi <- priors[grepl("rep_phi", variable, fixed = TRUE)] if (nrow(phi) > 0) { @@ -622,7 +620,7 @@ forecast_secondary <- function(estimate, # allocate empty parameters data <- allocate_empty( - data, c("frac_obs", "delay_mean", "delay_sd", "rep_phi"), + data, c("frac_obs", "delay_param", "rep_phi"), n = data$n ) data$all_dates <- as.integer(all_dates) diff --git a/R/estimate_truncation.R b/R/estimate_truncation.R index 80548616d..f42422620 100644 --- a/R/estimate_truncation.R +++ b/R/estimate_truncation.R @@ -101,11 +101,9 @@ estimate_truncation <- function(obs, max_truncation, trunc_max = 10, trunc_dist = "lognormal", truncation = trunc_opts( - dist_spec( - mean = 0, - mean_sd = 1, - sd = 0, - sd_sd = 1, + LogNormal( + meanlog = Normal(0, 1), + sdlog = Normal(1, 1), max = 10 ) ), @@ -118,7 +116,7 @@ estimate_truncation <- function(obs, max_truncation, trunc_max = 10, if (!is.null(model)) { lifecycle::deprecate_stop( - "2.0.0", + "1.5.0", "estimate_truncation(model)", "estimate_truncation(stan)" ) @@ -180,9 +178,16 @@ estimate_truncation <- function(obs, max_truncation, trunc_max = 10, construct_trunc <- TRUE } if (construct_trunc) { - truncation <- dist_spec( - mean = 0, mean_sd = 1, sd = 0, sd_sd = 1, distribution = trunc_dist, - max = trunc_max + params_mean <- c(0, 1) + params_sd <- c(1, 1) + parameters <- lapply(seq_along(params_mean), function(id) { + Normal(params_mean, params_sd) + }) + names(parameters) <- natural_params(trunc_dist) + parameters$max <- trunc_max + truncation <- new_dist_spec( + params = parameters, + distribution = trunc_dist ) } @@ -214,18 +219,12 @@ estimate_truncation <- function(obs, max_truncation, trunc_max = 10, weight = ifelse(weigh_delay_priors, data$t, 1) )) - ## convert to integer - data$trunc_dist <- - which(eval(formals()[["trunc_dist"]]) == trunc_dist) - 1 - # initial conditions init_fn <- function() { - data <- list( - delay_mean = array(rnorm(1, 0, 1)), - delay_sd = array(abs(rnorm(1, 0, 1))) + 1, + data <- c(create_delay_inits(data), list( phi = abs(rnorm(1, 0, 1)), sigma = abs(rnorm(1, 0, 1)) - ) + )) return(data) } @@ -237,16 +236,15 @@ estimate_truncation <- function(obs, max_truncation, trunc_max = 10, out <- list() # Summarise fit truncation distribution for downstream usage - delay_mean <- extract_stan_param(fit, params = "delay_mean") - delay_sd <- extract_stan_param(fit, params = "delay_sd") - out$dist <- dist_spec( - mean = round(delay_mean$mean, 3), - mean_sd = round(delay_mean$sd, 3), - sd = round(delay_sd$mean, 3), - sd_sd = round(delay_sd$sd, 3), - max = truncation$max - ) - out$dist$dist <- truncation$dist + delay_params <- extract_stan_param(fit, params = "delay_params") + params_mean <- round(delay_params$mean, 3) + params_sd <- round(delay_params$sd, 3) + parameters <- purrr::map(seq_along(params_mean), function(id) { + Normal(params_mean[id], params_sd[id]) + }) + names(parameters) <- natural_params(truncation[[1]]$distribution) + out$dist <- truncation + out$dist[[1]]$parameters <- parameters # summarise reconstructed observations recon_obs <- extract_stan_param(fit, "recon_obs", diff --git a/R/extract.R b/R/extract.R index c0e9abce5..9b1f011e6 100644 --- a/R/extract.R +++ b/R/extract.R @@ -221,20 +221,13 @@ extract_parameter_samples <- function(stan_fit, data, reported_dates, ] } if (data$delay_n_p > 0) { - out$delay_mean <- extract_parameter( - "delay_mean", samples, seq_len(data$delay_n_p) + out$delay_params <- extract_parameter( + "delay_params", samples, seq_len(data$delay_params_length) ) - out$delay_mean <- - out$delay_mean[, strat := as.character(time)][, time := NULL][, + out$delay_params <- + out$delay_params[, strat := as.character(time)][, time := NULL][, date := NULL ] - out$delay_sd <- extract_parameter( - "delay_sd", samples, seq_len(data$delay_n_p) - ) - out$delay_sd <- - out$delay_sd[, strat := as.character(time)][, time := NULL][, - date := NULL - ] } if (data$model_type == 1) { out$reporting_overdispersion <- extract_static_parameter("rep_phi", samples) diff --git a/R/get.R b/R/get.R index 583878a6b..6f272d717 100644 --- a/R/get.R +++ b/R/get.R @@ -158,7 +158,7 @@ get_regional_results <- function(regional_output, #' @description `r lifecycle::badge("deprecated")` #' #' This function has been deprecated. Please specify a distribution -#' using [dist_spec()] instead. +#' using functions such as [Gamma()] or [LogNormal()] instead. #' #' @param data A `` in the format of `generation_times`. #' @@ -177,19 +177,36 @@ get_regional_results <- function(regional_output, #' @export get_dist <- function(data, disease, source, max_value = 14, fixed = FALSE) { lifecycle::deprecate_warn( - "1.5.0", "get_dist()", "dist_spec()" + "1.5.0", "get_dist()", + details = c( + paste( + "Please use distribution functions such as `Gamma()` or `Lognormal()`", + "instead." + ), + "The function will be removed completely in version 2.0.0." + ) ) target_disease <- disease target_source <- source data <- data[disease == target_disease][source == target_source] - dist <- as.list( - data[, .(mean, mean_sd, sd, sd_sd, max = max_value, distribution = dist)] - ) if (fixed) { - dist$mean_sd <- 0 - dist$sd_sd <- 0 + data$sd <- 0 + data$sd_sd <- 0 + } + parameters <- list( + Normal(data$mean, data$mean_sd), + Normal(data$sd, data$sd_sd) + ) + if (data$dist == "gamma") { + names(parameters) <- c("mean", "sd") + } else { + names(parameters) <- c("meanlog", "sdlog") } - return(do.call(dist_spec, dist)) + parameters$max <- max_value + return(new_dist_spec( + params = parameters, + distribution = data$dist + )) } #' Get a Literature Distribution for the Generation Time #' @@ -197,7 +214,7 @@ get_dist <- function(data, disease, source, max_value = 14, fixed = FALSE) { #' #' Extracts a literature distribution from `generation_times`. #' This function has been deprecated. Please specify a distribution -#' using [dist_spec()] instead. +#' using functions such as [Gamma()] or [LogNormal()] instead. #' #' @inheritParams get_dist #' @inherit get_dist @@ -206,10 +223,17 @@ get_dist <- function(data, disease, source, max_value = 14, fixed = FALSE) { get_generation_time <- function(disease, source, max_value = 14, fixed = FALSE) { lifecycle::deprecate_warn( - "1.5.0", "get_generation_time()", "dist_spec()", - paste( - "To obtain the previous estimate by Ganyani et al. (2020) use ", - "`example_generation_time`" + "1.5.0", "get_generation_time()", + details = c( + paste( + "Please use distribution functions such as `Gamma()` or `Lognormal()`", + "instead." + ), + "The function will be removed completely in version 2.0.0.", + paste( + "To obtain the previous estimate by Ganyani et al. (2020) use", + "`example_generation_time`." + ) ) ) dist <- get_dist(EpiNow2::generation_times, @@ -225,19 +249,25 @@ get_generation_time <- function(disease, source, max_value = 14, #' #' Extracts a literature distribution from `generation_times`. #' This function has been deprecated. Please specify a distribution -#' using [dist_spec()] instead +#' using functions such as [Gamma()] or [LogNormal()] instead. #' #' @inheritParams get_dist #' @inherit get_dist #' @export -#' @seealso [dist_spec()] get_incubation_period <- function(disease, source, max_value = 14, fixed = FALSE) { lifecycle::deprecate_warn( - "1.5.0", "get_incubation_period()", "dist_spec()", - paste( - "To obtain the previous estimate by Ganyani et al. (2020) use ", - "`example_incubation_period`" + "1.5.0", "get_incubation_period()", + details = c( + paste( + "Please use distribution functions such as `Gamma()` or `Lognormal()`", + "instead." + ), + "The function will be removed completely in version 2.0.0.", + paste( + "To obtain the previous estimate by Ganyani et al. (2020) use", + "`example_incubation_period`." + ) ) ) dist <- get_dist(EpiNow2::incubation_periods, @@ -289,22 +319,20 @@ get_regions_with_most_reports <- function(reported_cases, ##' ##' The seeding time is set to the mean of the specified delays, constrained ##' to be at least the maximum generation time -##' @param delays Delays as specified using [dist_spec()] -##' @param generation_time Generation time as specified using [dist_spec()] +##' @param delays Delays specified using distribution functions such as +##' [Gamma()] or [LogNormal()] +##' @param generation_time Generation specified using distribution functions +##' such as [Gamma()] or [LogNormal()] ##' @return An integer seeding time get_seeding_time <- function(delays, generation_time) { # Estimate the mean delay ----------------------------------------------- - seeding_time <- sum(mean(delays)) + seeding_time <- sum(mean(delays, ignore_uncertainty = TRUE)) if (seeding_time < 1) { seeding_time <- 1 } else { - seeding_time <- as.integer(seeding_time) + seeding_time <- round(seeding_time) } ## make sure we have at least (length of total gt pmf - 1) seeding time - seeding_time <- max( - seeding_time, - sum(generation_time$max - 1) + sum(generation_time$np_pmf_length) - - length(generation_time$max) - length(generation_time$np_pmf_length) - ) + seeding_time <- max(seeding_time, sum(max(generation_time))) return(seeding_time) } diff --git a/R/opts.R b/R/opts.R index 624827372..3f302f954 100644 --- a/R/opts.R +++ b/R/opts.R @@ -3,9 +3,8 @@ #' @description `r lifecycle::badge("stable")` #' Returns generation time parameters in a format for lower level model use. #' -#' @param dist A delay distribution or series of delay distributions generated -#' using [dist_spec()]. If no distribution is given a fixed generation time of -#' 1 will be assumed. +#' @param dist A delay distribution or series of delay distributions . If no +#' distribution is given a fixed generation time of 1 will be assumed. #' #' @param ... deprecated; use `dist` instead #' @param disease deprecated; use `dist` instead @@ -15,28 +14,33 @@ #' @param prior_weight deprecated; prior weights are now specified as a #' model option. Use the `weigh_delay_priors` argument of #' [estimate_infections()] instead. +#' @inheritParams apply_tolerance #' @return A `` object summarising the input delay #' distributions. #' @seealso [convert_to_logmean()] [convert_to_logsd()] -#' [bootstrapped_dist_fit()] [dist_spec()] +#' [bootstrapped_dist_fit()] [Gamma()] [LogNormal()] [Fixed()] #' @export #' @examples #' # default settings with a fixed generation time of 1 #' generation_time_opts() #' #' # A fixed gamma distributed generation time -#' generation_time_opts(dist_spec(mean = 3, sd = 2, max = 14)) +#' generation_time_opts(Gamma(mean = 3, sd = 2, max = 14)) #' #' # An uncertain gamma distributed generation time #' generation_time_opts( -#' dist_spec(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5, max = 14) +#' Gamma( +#' mean = Normal(mean = 3, sd = 1), +#' sd = Normal(mean = 2, sd = 0.5), +#' max = 14 +#' ) #' ) #' #' # An example generation time #' generation_time_opts(example_generation_time) -generation_time_opts <- function(dist = dist_spec(mean = 1), ..., +generation_time_opts <- function(dist = Fixed(1), ..., disease, source, max = 14, fixed = FALSE, - prior_weight) { + prior_weight, tolerance = 0.001) { deprecated_options_given <- FALSE dot_options <- list(...) @@ -84,8 +88,8 @@ generation_time_opts <- function(dist = dist_spec(mean = 1), ..., } if (deprecated_options_given) { warning( - "The generation time distribution must be given to ", - "`generation_time_opts` using a call to `dist_spec`. ", + "The generation time distribution should be given to ", + "`generation_time_opts` using a `dist_spec`. ", "This behaviour has changed from previous versions of `EpiNow2` and ", "any code using it may need to be updated as any other ways of ", "specifying the generation time are deprecated and will be removed in ", @@ -93,6 +97,8 @@ generation_time_opts <- function(dist = dist_spec(mean = 1), ..., "information, see the relevant documentation pages using ", "`?generation_time_opts`") } + check_stan_delay(dist) + attr(dist, "tolerance") <- tolerance attr(dist, "class") <- c("generation_time_opts", class(dist)) return(dist) } @@ -173,10 +179,11 @@ secondary_opts <- function(type = "incidence", ...) { #' @description `r lifecycle::badge("stable")` #' Returns delay distributions formatted for usage by downstream #' functions. -#' @param dist A delay distribution or series of delay distributions generated -#' using [dist_spec()]. Default is an empty call to [dist_spec()], i.e. no delay +#' @param dist A delay distribution or series of delay distributions. Default is +#' a fixed distribution with all mass at 0, i.e. no delay. #' @param ... deprecated; use `dist` instead #' @param fixed deprecated; use `dist` instead +#' @inheritParams apply_tolerance #' @return A `` object summarising the input delay distributions. #' @seealso [convert_to_logmean()] [convert_to_logsd()] #' [bootstrapped_dist_fit()] [dist_spec()] @@ -186,16 +193,16 @@ secondary_opts <- function(type = "incidence", ...) { #' delay_opts() #' #' # A single delay that has uncertainty -#' delay <- dist_spec(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 14) +#' delay <- LogNormal(mean = Normal(1, 0.2), sd = Normal(0.5, 0.1), max = 14) #' delay_opts(delay) #' #' # A single delay without uncertainty -#' delay <- dist_spec(mean = 1, sd = 0.5, max = 14) +#' delay <- LogNormal(meanlog = 1, sdlog = 0.5, max = 14) #' delay_opts(delay) #' #' # Multiple delays (in this case twice the same) #' delay_opts(delay + delay) -delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { +delay_opts <- function(dist = Fixed(0), ..., fixed = FALSE, tolerance = 0.001) { dot_options <- list(...) if (!is(dist, "dist_spec")) { ## could be old syntax if (is.list(dist)) { @@ -226,6 +233,8 @@ delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { ## can be removed once dot options are hard deprecated stop("Unknown named arguments passed to `delay_opts`") } + check_stan_delay(dist) + attr(dist, "tolerance") <- tolerance attr(dist, "class") <- c("delay_opts", class(dist)) return(dist) } @@ -239,7 +248,8 @@ delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { #' #' @param dist A delay distribution or series of delay distributions reflecting #' the truncation generated using [dist_spec()] or [estimate_truncation()]. -#' Default is an empty call to [dist_spec()], i.e. no truncation +#' Default is fixed distribution with maximum 0, i.e. no truncation +#' @inheritParams apply_tolerance #' @return A `` object summarising the input truncation #' distribution. #' @@ -251,8 +261,8 @@ delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { #' trunc_opts() #' #' # truncation dist -#' trunc_opts(dist = dist_spec(mean = 3, sd = 2, max = 10)) -trunc_opts <- function(dist = dist_spec()) { +#' trunc_opts(dist = LogNormal(mean = 3, sd = 2, max = 10)) +trunc_opts <- function(dist = Fixed(0), tolerance = 0.001) { if (!is(dist, "dist_spec")) { if (is.list(dist)) { dist <- do.call(dist_spec, dist) @@ -268,6 +278,8 @@ trunc_opts <- function(dist = dist_spec()) { "`?trunc_opts`" ) } + check_stan_delay(dist) + attr(dist, "tolerance") <- tolerance attr(dist, "class") <- c("trunc_opts", class(dist)) return(dist) } diff --git a/R/report.R b/R/report.R index ee32ea034..a055118b1 100644 --- a/R/report.R +++ b/R/report.R @@ -53,18 +53,6 @@ report_cases <- function(case_estimates, CrIs = c(0.2, 0.5, 0.9)) { samples <- length(unique(case_estimates$sample)) - # define delay distributions - delay_defs <- purrr::map( - seq_along(delays$mean_mean), - ~ EpiNow2::lognorm_dist_def( - mean = delays$mean_mean[.], - mean_sd = delays$mean_sd[.], - sd = delays$mean_mean[.], - sd_sd = delays$mean_sd[.], - max_value = delays$max[.], - samples = samples - ) - ) # add a null reporting effect if missing if (missing(reporting_effect)) { reporting_effect <- data.table::data.table( @@ -91,7 +79,7 @@ report_cases <- function(case_estimates, report <- future.apply::future_lapply(1:max(infections$sample), function(id) { EpiNow2::adjust_infection_to_report(infections[sample == id], - delay_defs = purrr::map(delay_defs, ~ .[id, ]), + delay_defs = delays, type = type, reporting_effect = reporting_effect[sample == id, ]$effect ) @@ -213,7 +201,7 @@ report_summary <- function(summarised_estimates, } if (!is.null(target_folder)) { - saveRDS(summary, paste0(target_folder, "/summary.rds")) + saveRDS(summary, file.path(target_folder, "summary.rds")) } return(summary) } diff --git a/R/simulate_infections.R b/R/simulate_infections.R index f8bcfcde6..c3ec8aaba 100644 --- a/R/simulate_infections.R +++ b/R/simulate_infections.R @@ -11,7 +11,7 @@ #' A previous function called [simulate_infections()] that simulates from a #' given model fit has been renamed [forecast_infections()]. Using #' [simulate_infections()] with existing estimates is now deprecated. This -#' option will be removed in version 2.1.0. +#' option will be removed in version 2.0.0. #' @param R a data frame of reproduction numbers (column `R`) by date (column #' `date`). Column `R` must be numeric and `date` must be in date format. If #' not all days between the first and last day in the `date` are present, @@ -66,8 +66,8 @@ simulate_infections <- function(estimates, R, initial_infections, "simulate_infections(estimates)", "forecast_infections()", details = paste0( - "This `estimates` option will be removed from [simulate_infections()] ", - "in version 2.1.0." + "The `estimates` option will be removed from [simulate_infections()] ", + "in version 2.0.0." ) ) return(forecast_infections(estimates = estimates, ...)) @@ -118,22 +118,17 @@ simulate_infections <- function(estimates, R, initial_infections, trunc = truncation )) - if ((length(data$delay_mean_sd) > 0 && any(data$delay_mean_sd > 0)) || - (length(data$delay_sd_sd) > 0 && any(data$delay_sd_sd > 0))) { + if (length(data$delay_params_sd) > 0 && any(data$delay_params_sd > 0)) { stop( "Cannot simulate from uncertain parameters. Use the [fix_dist()] ", "function to set the parameters of uncertain distributions either the ", "mean or a randomly sampled value" ) } - data$delay_mean <- array( - data$delay_mean_mean, dim = c(1, length(data$delay_mean_mean)) + data$delay_params <- array( + data$delay_params_mean, dim = c(1, length(data$delay_params_mean)) ) - data$delay_sd <- array( - data$delay_sd_mean, dim = c(1, length(data$delay_sd_mean)) - ) - data$delay_mean_sd <- NULL - data$delay_sd_sd <- NULL + data$delay_params_sd <- NULL data <- c(data, create_obs_model( obs, dates = R$date @@ -417,7 +412,7 @@ forecast_infections <- function(estimates, ## allocate empty parameters data <- allocate_empty( - data, c("frac_obs", "delay_mean", "delay_sd", "rep_phi"), + data, c("frac_obs", "delay_params", "rep_phi"), n = data$n ) diff --git a/R/simulate_secondary.R b/R/simulate_secondary.R index 655a08244..ed1aabb58 100644 --- a/R/simulate_secondary.R +++ b/R/simulate_secondary.R @@ -76,22 +76,17 @@ simulate_secondary <- function(primary, trunc = truncation )) - if ((length(data$delay_mean_sd) > 0 && any(data$delay_mean_sd > 0)) || - (length(data$delay_sd_sd) > 0 && any(data$delay_sd_sd > 0))) { + if (length(data$delay_params_sd) > 0 && any(data$delay_params_sd > 0)) { stop( "Cannot simulate from uncertain parameters. Use the [fix_dist()] ", "function to set the parameters of uncertain distributions either the ", "mean or a randomly sampled value" ) } - data$delay_mean <- array( - data$delay_mean_mean, dim = c(1, length(data$delay_mean_mean)) + data$delay_params <- array( + data$delay_params_mean, dim = c(1, length(data$delay_params_mean)) ) - data$delay_sd <- array( - data$delay_sd_mean, dim = c(1, length(data$delay_sd_mean)) - ) - data$delay_mean_sd <- NULL - data$delay_sd_sd <- NULL + data$delay_params_sd <- NULL data <- c(data, create_obs_model( obs, dates = primary$date diff --git a/_pkgdown.yml b/_pkgdown.yml index b7740bb64..75c7313b6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -122,6 +122,10 @@ reference: - title: Define, Fit and Parameterise Distributions desc: Functions to define, fit and parameterise distributions contents: + - Distributions + - apply_tolerance + - collapse + - discretise - contains("dist") - title: Simulate desc: Functions to help with simulating data or mapping to reported cases diff --git a/data-raw/estimate-infections.R b/data-raw/estimate-infections.R index bf442ac34..aa278ef8f 100644 --- a/data-raw/estimate-infections.R +++ b/data-raw/estimate-infections.R @@ -7,12 +7,7 @@ options(mc.cores = 4) reported_cases <- example_confirmed[1:60] #' # use example distributions -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), - sd = convert_to_logsd(2, 1), - max = 10, - dist = "lognormal" -) +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10L) example_estimate_infections <- estimate_infections(reported_cases, generation_time = generation_time_opts(example_generation_time), diff --git a/data-raw/generation-time.R b/data-raw/generation-time.R index 5c03e1c22..3f71e8757 100644 --- a/data-raw/generation-time.R +++ b/data-raw/generation-time.R @@ -10,12 +10,9 @@ library(here) ## Load raw MCMC output gi <- setDT(readRDS(here("data-raw", "gi.rds"))) ## Check mean and standard deviation -example_generation_time <- dist_spec( - mean = median(gi$mean), - mean_sd = sd(gi$mean), - sd = median(gi$sd), - sd_sd = sd(gi$sd), - dist = "gamma", +example_generation_time <- Gamma( + mean = Normal(median(gi$mean), sd(gi$mean)), + sd = Normal(median(gi$sd), sd(gi$sd)), max = 14L ) diff --git a/data-raw/incubation-period.R b/data-raw/incubation-period.R index fe25845b0..97758ef32 100644 --- a/data-raw/incubation-period.R +++ b/data-raw/incubation-period.R @@ -3,12 +3,9 @@ library(EpiNow2) ## COVID-19 incubation period from Lauer et al., ## https://doi.org/10.7326/M20-0504 -example_incubation_period <- dist_spec( - mean = 1.621, - mean_sd = 0.0640, - sd = 0.418, - sd_sd = 0.0691, - dist = "lognormal", +example_incubation_period <- LogNormal( + meanlog = Normal(1.621, 0.0640), + sdlog = Normal(0.418, 0.0691), max = 14L ) diff --git a/data-raw/reporting-delay.R b/data-raw/reporting-delay.R index 05503f3d1..e2cc12d33 100644 --- a/data-raw/reporting-delay.R +++ b/data-raw/reporting-delay.R @@ -2,11 +2,8 @@ library(EpiNow2) ## example reporting delay -example_reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), - sd = convert_to_logsd(2, 1), - max = 10, - dist = "lognormal" +example_reporting_delay <- LogNormal( + meanlog = Normal(0.6, 0.06), sdlog = Normal(0.5, 0.05), max = 10L ) usethis::use_data(example_reporting_delay, overwrite = TRUE) diff --git a/data-raw/truncated.R b/data-raw/truncated.R index 03805d1cb..30c1e7356 100644 --- a/data-raw/truncated.R +++ b/data-raw/truncated.R @@ -11,19 +11,25 @@ library("EpiNow2") #' @keywords internal apply_truncation <- function(index, data, dist) { set.seed(index) - if (dist$dist == 0) { + if (dist[[1]]$distribution == 0) { dfunc <- dlnorm } else { dfunc <- dgamma } cmf <- cumsum( dfunc( - 1:(dist$max + 1), - rnorm(1, dist$mean_mean, dist$mean_sd), - rnorm(1, dist$sd_mean, dist$sd_sd) + seq_len(max(dist) + 1), + rnorm(1, + dist[[1]]$parameters$meanlog[[1]]$parameters$mean, + dist[[1]]$parameters$meanlog[[1]]$parameters$sd + ), + rnorm(1, + dist[[1]]$parameters$sdlog[[1]]$parameters$mean, + dist[[1]]$parameters$sdlog[[1]]$parameters$sd + ) ) ) - cmf <- cmf / cmf[dist$max + 1] + cmf <- cmf / cmf[max(dist) + 1] cmf <- rev(cmf)[-1] trunc_data <- data.table::copy(data)[1:(.N - index)] trunc_data[ @@ -36,11 +42,9 @@ apply_truncation <- function(index, data, dist) { reported_cases <- example_confirmed[1:60] # define example truncation distribution (note not integer adjusted) -trunc_dist <- dist_spec( - mean = convert_to_logmean(3, 2), - mean_sd = 0.1, - sd = convert_to_logsd(3, 2), - sd_sd = 0.1, +trunc <- LogNormal( + meanlog = Normal(0.9, 0.1), + sdlog = Normal(0.6, 0.1), max = 10 ) @@ -50,7 +54,7 @@ example_truncated <- purrr::map( seq(20, 0, -5), apply_truncation, data = reported_cases, - dist = trunc_dist + dist = trunc ) usethis::use_data( diff --git a/data/example_generation_time.rda b/data/example_generation_time.rda index 58105081f..21dae5351 100644 Binary files a/data/example_generation_time.rda and b/data/example_generation_time.rda differ diff --git a/data/example_incubation_period.rda b/data/example_incubation_period.rda index 60dd06d26..d3ba29200 100644 Binary files a/data/example_incubation_period.rda and b/data/example_incubation_period.rda differ diff --git a/data/example_reporting_delay.rda b/data/example_reporting_delay.rda index 3e0b386b1..894b3d933 100644 Binary files a/data/example_reporting_delay.rda and b/data/example_reporting_delay.rda differ diff --git a/data/example_truncated.rda b/data/example_truncated.rda index 2a0a9319b..7729481bb 100644 Binary files a/data/example_truncated.rda and b/data/example_truncated.rda differ diff --git a/inst/dev/design_dist.md b/inst/dev/design_dist.md new file mode 100644 index 000000000..00bbdb67f --- /dev/null +++ b/inst/dev/design_dist.md @@ -0,0 +1,33 @@ +# Design considerations of the distribution interface + +We are aiming for an interface with the following properties + * it should be clear to users which probability distribution is being used + * it can represent discrete and continuous parameters + * distribution parameters are labelled according to their meaning (e.g. mean vs. meanlog) + +We need to be able to represent + * (especially left-)truncated and untruncated distributions + * nested distributions (parameters of one distribution can be distributed according to another distribution) + +Ideally we'd also be able to represent + * parametric and nonparametric distributions + * distributions that are convolutions of two specified distributions + +This interface could be of potential use to other packages (e.g. `epinowcast`, `epidist`, `epiparameter`). It should therefore stand on its own. Any `EpiNow2` specific functionality (especially relating to conversion to stan code) should not interfere with core functionality of the distribution interface. + +## Proposed interface + +We are an proposing an interface similar to what is used in `{rstanarm}` and `{distributions3}`, where probability distributions are called according to their capitalised names. This avoids masking `base::gamma()` (although we still mask `stats::Gamma`). There is potential for integration especially with `{distributionS3}`. Parameters are nested according to the same syntax. + +Examples: +```{r} +Gamma(mean = 3, sd = 1) +Gamma(mean = 3, sd = 1, max = 14) +Gamma(mean = Normal(mean = 3, sd = 1), sd = 4, max = 14) +Gamma(shape = Normal(mean = 3, sd = 1), rate = Normal(mean = 2, sd = 0.5), max = 14) +Gamma(shape = 2, rate = 1, max = 14) +LogNormal(meanlog = 1.5, sdlog = 1) +LogNormal(mean = Normal(1.5, 0.1), sd = Normal(1, 0.1)) +NonParametric(c(0.2, 0.4, 0.4)) +NonParametric(c(0.2, 0.4, 0.2, 0.05)) +``` diff --git a/inst/stan/data/delays.stan b/inst/stan/data/delays.stan index 0c624ffbd..55036454b 100644 --- a/inst/stan/data/delays.stan +++ b/inst/stan/data/delays.stan @@ -1,17 +1,20 @@ int delay_n; // number of delay distributions int delay_n_p; // number of parametric delay distributions int delay_n_np; // number of nonparametric delay distributions - array[delay_n_p] real delay_mean_mean; // prior mean of mean delay distribution - array[delay_n_p] real delay_mean_sd; // prior sd of mean delay distribution - array[delay_n_p] real delay_sd_mean; // prior sd of sd of delay distribution - array[delay_n_p] real delay_sd_sd; // prior sd of sd of delay distribution array[delay_n_p] int delay_max; // maximum delay distribution array[delay_n_p] int delay_dist; // 0 = lognormal; 1 = gamma + int delay_np_pmf_length; // number of nonparametric pmf elements vector[delay_np_pmf_length] delay_np_pmf; // ragged array of fixed PMFs array[delay_n_np + 1] int delay_np_pmf_groups; // links to ragged array - array[delay_n_p] int delay_weight; + int delay_params_length; // number of parameters across all parametric delay distributions + vector[delay_params_length] delay_params_lower; // ragged array of lower bounds of the parameters + vector[delay_params_length] delay_params_mean; // ragged array of mean parameters for parametric delay distributions + vector[delay_params_length] delay_params_sd; // ragged array of sd of parameters for parametric delay distributions + array[delay_n_p + 1] int delay_params_groups; // links to ragged array + + array[delay_n_p] int delay_weight; // delay weights int delay_types; // number of delay types array[delay_n] int delay_types_p; // whether delay types are parametric array[delay_n] int delay_types_id; // whether delay types are parametric diff --git a/inst/stan/data/simulation_delays.stan b/inst/stan/data/simulation_delays.stan index 0ceeedcaa..bbac5d25a 100644 --- a/inst/stan/data/simulation_delays.stan +++ b/inst/stan/data/simulation_delays.stan @@ -1,8 +1,6 @@ int delay_n; // number of delay distribution distributions int delay_n_p; // number of parametric delay distributions int delay_n_np; // number of nonparametric delay distributions - array[n, delay_n_p] real delay_mean; // prior mean of mean delay distribution - array[n, delay_n_p] real delay_sd; // prior sd of sd of delay distribution array[delay_n_p] int delay_max; // maximum delay distribution array[delay_n_p] int delay_dist; // 0 = lognormal; 1 = gamma int delay_np_pmf_length; // number of nonparametric pmf elements @@ -10,6 +8,10 @@ array[delay_n_np + 1] int delay_np_pmf_groups; // links to ragged array array[delay_n_p] int delay_weight; + int delay_params_length; // number of parameters across all parametric delay distributions + array[n] vector[delay_params_length] delay_params; // ragged array of mean parameters for parametric delay distributions + array[delay_n_p + 1] int delay_params_groups; // links to ragged array + int delay_types; // number of delay types array[delay_n] int delay_types_p; // whether delay types are parametric array[delay_n] int delay_types_id; // whether delay types are parametric diff --git a/inst/stan/estimate_infections.stan b/inst/stan/estimate_infections.stan index e1d4d0159..0b5808d78 100644 --- a/inst/stan/estimate_infections.stan +++ b/inst/stan/estimate_infections.stan @@ -48,8 +48,8 @@ parameters{ array[bp_n > 0 ? 1 : 0] real bp_sd; // standard deviation of breakpoint effect array[bp_n] real bp_effects; // Rt breakpoint effects // observation model - array[delay_n_p] real delay_mean; // mean of delays - array[delay_n_p] real delay_sd; // sd of delays + + vector[delay_params_length] delay_params; // delay parameters simplex[week_effect] day_of_week_simplex;// day of week reporting effect array[obs_scale_sd > 0 ? 1 : 0] real frac_obs; // fraction of cases that are ultimately observed array[model_type] real rep_phi; // overdispersion of the reporting process @@ -71,7 +71,7 @@ transformed parameters { gt_rev_pmf = get_delay_rev_pmf( gt_id, delay_type_max[gt_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + delay_np_pmf_groups, delay_params, delay_params_groups, delay_dist, 1, 1, 0 ); R = update_Rt( @@ -92,7 +92,7 @@ transformed parameters { vector[delay_type_max[delay_id] + 1] delay_rev_pmf = get_delay_rev_pmf( delay_id, delay_type_max[delay_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + delay_np_pmf_groups, delay_params, delay_params_groups, delay_dist, 0, 1, 0 ); reports = convolve_to_report(infections, delay_rev_pmf, seeding_time); @@ -112,7 +112,7 @@ transformed parameters { vector[delay_type_max[trunc_id] + 1] trunc_rev_cmf = get_delay_rev_pmf( trunc_id, delay_type_max[trunc_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + delay_np_pmf_groups, delay_params, delay_params_groups, delay_dist, 0, 1, 1 ); obs_reports = truncate(reports[1:ot], trunc_rev_cmf, 0); @@ -130,8 +130,7 @@ model { } // penalised priors for delay distributions delays_lp( - delay_mean, delay_mean_mean, - delay_mean_sd, delay_sd, delay_sd_mean, delay_sd_sd, + delay_params, delay_params_mean, delay_params_sd, delay_params_groups, delay_dist, delay_weight ); if (estimate_r) { @@ -168,14 +167,13 @@ generated quantities { r = R_to_growth(R, gt_mean, gt_var); } else { // sample generation time - array[delay_n_p] real delay_mean_sample = - normal_rng(delay_mean_mean, delay_mean_sd); - array[delay_n_p] real delay_sd_sample = - normal_rng(delay_sd_mean, delay_sd_sd); + vector[delay_params_length] delay_params_sample = to_vector(normal_lb_rng( + delay_params_mean, delay_params_sd, delay_params_lower + )); vector[delay_type_max[gt_id] + 1] sampled_gt_rev_pmf = get_delay_rev_pmf( gt_id, delay_type_max[gt_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean_sample, delay_sd_sample, + delay_np_pmf_groups, delay_params_sample, delay_params_groups, delay_dist, 1, 1, 0 ); gt_mean = rev_pmf_mean(sampled_gt_rev_pmf, 1); diff --git a/inst/stan/estimate_secondary.stan b/inst/stan/estimate_secondary.stan index ff2aad7d4..70fcc8d4a 100644 --- a/inst/stan/estimate_secondary.stan +++ b/inst/stan/estimate_secondary.stan @@ -27,8 +27,7 @@ transformed data{ parameters{ // observation model - array[delay_n_p] real delay_mean; - array[delay_n_p] real delay_sd; // sd of delays + vector[delay_params_length] delay_params; simplex[week_effect] day_of_week_simplex; // day of week reporting effect array[obs_scale] real frac_obs; // fraction of cases that are ultimately observed array[model_type] real rep_phi; // overdispersion of the reporting process @@ -53,7 +52,7 @@ transformed parameters { vector[delay_type_max[delay_id] + 1] delay_rev_pmf = get_delay_rev_pmf( delay_id, delay_type_max[delay_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + delay_np_pmf_groups, delay_params, delay_params_groups, delay_dist, 0, 1, 0 ); convolved = convolved + convolve_to_report(scaled, delay_rev_pmf, 0); @@ -76,7 +75,7 @@ transformed parameters { vector[delay_type_max[trunc_id]] trunc_rev_cmf = get_delay_rev_pmf( trunc_id, delay_type_max[trunc_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + delay_np_pmf_groups, delay_params, delay_params_groups, delay_dist, 0, 1, 1 ); secondary = truncate(secondary, trunc_rev_cmf, 0); @@ -86,10 +85,10 @@ transformed parameters { model { // penalised priors for delay distributions delays_lp( - delay_mean, delay_mean_mean, delay_mean_sd, delay_sd, delay_sd_mean, - delay_sd_sd, delay_dist, delay_weight + delay_params, delay_params_mean, delay_params_sd, delay_params_groups, + delay_dist, delay_weight ); - + // prior primary report scaling if (obs_scale) { frac_obs[1] ~ normal(obs_scale_mean, obs_scale_sd) T[0, 1]; diff --git a/inst/stan/estimate_truncation.stan b/inst/stan/estimate_truncation.stan index b5ffd673b..3432c094f 100644 --- a/inst/stan/estimate_truncation.stan +++ b/inst/stan/estimate_truncation.stan @@ -28,8 +28,7 @@ transformed data{ } } parameters { - array[delay_n_p] real delay_mean; - array[delay_n_p] real delay_sd; // sd of delays + vector[delay_params_length] delay_params; real phi; real sigma; } @@ -41,7 +40,7 @@ transformed parameters{ vector[delay_type_max[trunc_id] + 1] trunc_rev_cmf = get_delay_rev_pmf( trunc_id, delay_type_max[trunc_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + delay_np_pmf_groups, delay_params, delay_params_groups, delay_dist, 0, 1, 1 ); { @@ -60,10 +59,10 @@ transformed parameters{ model { // priors for the log normal truncation distribution delays_lp( - delay_mean, delay_mean_mean, delay_mean_sd, delay_sd, delay_sd_mean, - delay_sd_sd, delay_dist, delay_weight + delay_params, delay_params_mean, delay_params_sd, delay_params_groups, + delay_dist, delay_weight ); - + phi ~ normal(0, 1) T[0,]; sigma ~ normal(0, 1) T[0,]; diff --git a/inst/stan/functions/delays.stan b/inst/stan/functions/delays.stan index 203594efa..dcc17f1b0 100644 --- a/inst/stan/functions/delays.stan +++ b/inst/stan/functions/delays.stan @@ -21,7 +21,7 @@ vector get_delay_rev_pmf( int delay_id, int len, array[] int delay_types_p, array[] int delay_types_id, array[] int delay_types_groups, array[] int delay_max, vector delay_np_pmf, array[] int delay_np_pmf_groups, - array[] real delay_mean, array[] real delay_sigma, array[] int delay_dist, + vector delay_params, array[] int delay_params_groups, array[] int delay_dist, int left_truncate, int reverse_pmf, int cumulative ) { // loop over delays @@ -30,10 +30,11 @@ vector get_delay_rev_pmf( int new_len; for (i in delay_types_groups[delay_id]:(delay_types_groups[delay_id + 1] - 1)) { if (delay_types_p[i]) { // parametric + int start = delay_params_groups[delay_types_id[i]]; + int end = delay_params_groups[delay_types_id[i] + 1] - 1; vector[delay_max[delay_types_id[i]] + 1] new_variable_pmf = discretised_pmf( - delay_mean[delay_types_id[i]], - delay_sigma[delay_types_id[i]], + delay_params[start:end], delay_max[delay_types_id[i]] + 1, delay_dist[delay_types_id[i]] ); @@ -75,30 +76,41 @@ vector get_delay_rev_pmf( } -void delays_lp(array[] real delay_mean, array[] real delay_mean_mean, array[] real delay_mean_sd, - array[] real delay_sd, array[] real delay_sd_mean, array[] real delay_sd_sd, +void delays_lp(vector delay_params, + vector delay_params_mean, vector delay_params_sd, + array[] int delay_params_groups, array[] int delay_dist, array[] int weight) { - int mean_delays = num_elements(delay_mean); - int sd_delays = num_elements(delay_sd); - if (mean_delays) { - for (s in 1:mean_delays) { - if (delay_mean_sd[s] > 0) { - // uncertain mean - target += normal_lpdf(delay_mean[s] | delay_mean_mean[s], delay_mean_sd[s]) * weight[s]; - // if a distribution with postive support only truncate the prior - if (delay_dist[s]) { - target += -normal_lccdf(0 | delay_mean_mean[s], delay_mean_sd[s]) * weight[s]; - } + int n_delays = num_elements(delay_params_groups) - 1; + if (n_delays == 0) { + return; + } + for (d in 1:n_delays) { + int start = delay_params_groups[d]; + int end = delay_params_groups[d + 1] - 1; + for (s in start:end) { + if (delay_params_sd[s] > 0) { + // uncertain mean + target += normal_lpdf( + delay_params[s] | delay_params_mean[s], delay_params_sd[s] + ) * weight[d]; + // if a distribution with postive support only truncate the prior + if (delay_dist[d] == 1) { + target += -normal_lccdf( + 0 | delay_params_mean[s], delay_params_sd[s] + ) * weight[d]; } } } - if (sd_delays) { - for (s in 1:sd_delays) { - if (delay_sd_sd[s] > 0) { - // uncertain sd - target += normal_lpdf(delay_sd[s] | delay_sd_mean[s], delay_sd_sd[s]) * weight[s]; - target += -normal_lccdf(0 | delay_sd_mean[s], delay_sd_sd[s]) * weight[s]; - } - } } } + +vector normal_lb_rng(vector mu, vector sigma, vector lb) { + int len = num_elements(mu); + vector[len] ret; + for (i in 1:len) { + real p = normal_cdf(lb[i] | mu[i], sigma[i]); // cdf for bounds + real u = uniform_rng(p, 1); + ret[i] = (sigma[i] * inv_Phi(u)) + mu[i]; // inverse cdf for value + } + return ret; +} diff --git a/inst/stan/functions/pmfs.stan b/inst/stan/functions/pmfs.stan index 103ef7b49..a22d7fd8e 100644 --- a/inst/stan/functions/pmfs.stan +++ b/inst/stan/functions/pmfs.stan @@ -1,35 +1,26 @@ // Calculate the daily probability of reporting using parametric // distributions up to the maximum observed delay. -// If sigma is 0 all the probability mass is put on n. // Adapted from https://github.com/epiforecasts/epinowcast // (MIT License, copyright: epinowcast authors) -vector discretised_pmf(real mu, real sigma, int n, int dist) { +vector discretised_pmf(vector params, int n, int dist) { vector[n] lpmf; - if (sigma > 0) { - vector[n] upper_lcdf; - if (dist == 0) { - for (i in 1:n) { - upper_lcdf[i] = lognormal_lcdf(i | mu, sigma); - } - } else if (dist == 1) { - real alpha = mu^2 / sigma^2; - real beta = mu / sigma^2; - for (i in 1:n) { - upper_lcdf[i] = gamma_lcdf(i | alpha, beta); - } - } else { - reject("Unknown distribution function provided."); + vector[n] upper_lcdf; + if (dist == 0) { + for (i in 1:n) { + upper_lcdf[i] = lognormal_lcdf(i | params[1], params[2]); + } + } else if (dist == 1) { + for (i in 1:n) { + upper_lcdf[i] = gamma_lcdf(i | params[1], params[2]); } - // discretise - lpmf[1] = upper_lcdf[1]; - lpmf[2:n] = log_diff_exp(upper_lcdf[2:n], upper_lcdf[1:(n-1)]); - // normalize - lpmf = lpmf - upper_lcdf[n]; } else { - // delta function - lpmf = rep_vector(negative_infinity(), n); - lpmf[n] = 0; + reject("Unknown distribution function provided."); } + // discretise + lpmf[1] = upper_lcdf[1]; + lpmf[2:n] = log_diff_exp(upper_lcdf[2:n], upper_lcdf[1:(n-1)]); + // normalize + lpmf = lpmf - upper_lcdf[n]; return(exp(lpmf)); } diff --git a/inst/stan/simulate_infections.stan b/inst/stan/simulate_infections.stan index 815b8e8e9..d6ff12128 100644 --- a/inst/stan/simulate_infections.stan +++ b/inst/stan/simulate_infections.stan @@ -42,7 +42,7 @@ generated quantities { gt_rev_pmf = get_delay_rev_pmf( gt_id, delay_type_max[gt_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + delay_np_pmf_groups, delay_params[i], delay_params_groups, delay_dist, 1, 1, 0 ); @@ -55,7 +55,7 @@ generated quantities { vector[delay_type_max[delay_id] + 1] delay_rev_pmf = get_delay_rev_pmf( delay_id, delay_type_max[delay_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + delay_np_pmf_groups, delay_params[i], delay_params_groups, delay_dist, 0, 1, 0 ); // convolve from latent infections to mean of observations @@ -79,7 +79,7 @@ generated quantities { vector[delay_type_max[trunc_id] + 1] trunc_rev_cmf = get_delay_rev_pmf( trunc_id, delay_type_max[trunc_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + delay_np_pmf_groups, delay_params[i], delay_params_groups, delay_dist, 0, 1, 1 ); reports[i] = to_row_vector(truncate( diff --git a/inst/stan/simulate_secondary.stan b/inst/stan/simulate_secondary.stan index 7c1a4e965..d59f1d484 100644 --- a/inst/stan/simulate_secondary.stan +++ b/inst/stan/simulate_secondary.stan @@ -46,7 +46,7 @@ generated quantities { vector[delay_type_max[delay_id] + 1] delay_rev_pmf = get_delay_rev_pmf( delay_id, delay_type_max[delay_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + delay_np_pmf_groups, delay_params[i], delay_params_groups, delay_dist, 0, 1, 0 ); convolved = convolved + convolve_to_report(scaled, delay_rev_pmf, 0); @@ -70,7 +70,7 @@ generated quantities { vector[delay_type_max[trunc_id] + 1] trunc_rev_cmf = get_delay_rev_pmf( trunc_id, delay_type_max[trunc_id] + 1, delay_types_p, delay_types_id, delay_types_groups, delay_max, delay_np_pmf, - delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + delay_np_pmf_groups, delay_params[i], delay_params_groups, delay_dist, 0, 1, 1 ); secondary = truncate( diff --git a/man/Distributions.Rd b/man/Distributions.Rd new file mode 100644 index 000000000..52f93b06c --- /dev/null +++ b/man/Distributions.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{Distributions} +\alias{Distributions} +\alias{LogNormal} +\alias{Gamma} +\alias{Normal} +\alias{Fixed} +\alias{NonParametric} +\title{Probability distributions} +\usage{ +LogNormal(meanlog, sdlog, mean, sd, max = Inf) + +Gamma(shape, rate, scale, mean, sd, max = Inf) + +Normal(mean, sd, max = Inf) + +Fixed(value, max = Inf) + +NonParametric(pmf) +} +\arguments{ +\item{meanlog, sdlog}{mean and standard deviation of the distribution + on the log scale with default values of \code{0} and \code{1} respectively.} + +\item{mean, sd}{mean and standard deviation of the distribution} + +\item{max}{Numeric, maximum value of the distribution. The distribution will +be truncated at this value. Default: \code{Inf}, i.e. no maximum.} + +\item{shape, scale}{shape and scale parameters. Must be positive, + \code{scale} strictly.} + +\item{rate}{an alternative way to specify the scale.} + +\item{value}{Value of the fixed (delta) distribution} + +\item{pmf}{Probability mass of the given distribution; this is +passed as a zero-indexed numeric vector (i.e. the fist entry represents +the probability mass of zero). If not summing to one it will be normalised +to sum to one internally.} +} +\value{ +A \code{dist_spec} representing a distribution of the given +specification. +} +\description{ +Probability distributions + +Generates a nonparametric distribution. +} +\details{ +Probability distributions are ubiquitous in EpiNow2, usually representing +epidemiological delays (e.g., the generation time for delays between +becoming infecting and infecting others; or reporting delays) + +They are generated using functions that have a name corresponding to the +probability distribution that is being used. They generated \code{dist_spec} +objects that are then passed to the models underlying EpiNow2. +All parameters can be given either as fixed values (a numeric value) or as +uncertain values (a \code{dist_sepc}). If given as uncertain values, currently +only normally distributed parameters (generated using \code{Normal()}) are +supported. + +Each distribution has a representation in terms of "natural" parameters (the +ones used in stan) but can sometimes also be specified using other +parameters such as the mean or standard deviation of the distribution. If +not given as natural parameters then these will be calculated from the given +parameters. If they have uncertainty, this will be done by random sampling +from the given uncertainty and converting resulting parameters to their +natural representation. + +Currently available distributions are lognormal, gamma, normal, fixed +(delta) and nonparametric. The nonparametric is a special case where the +probability mass function is given directly as a numeric vector. +} +\examples{ +LogNormal(mean = 4, sd = 1) +LogNormal(mean = 4, sd = 1, max = 10) +LogNormal(mean = Normal(4, 1), sd = 1, max = 10) +Gamma(mean = 4, sd = 1) +Gamma(shape = 16, rate = 4) +Gamma(shape = Normal(16, 2), rate = Normal(4, 1)) +Gamma(shape = Normal(16, 2), scale = Normal(1/4, 1)) +Normal(mean = 4, sd = 1) +Normal(mean = 4, sd = 1, max = 10) +Fixed(value = 3) +Fixed(value = 3.5) +NonParametric(c(0.1, 0.3, 0.2, 0.4)) +NonParametric(c(0.1, 0.3, 0.2, 0.1, 0.1)) +} diff --git a/man/apply_tolerance.Rd b/man/apply_tolerance.Rd new file mode 100644 index 000000000..c74e3623c --- /dev/null +++ b/man/apply_tolerance.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{apply_tolerance} +\alias{apply_tolerance} +\title{Applies a threshold to all nonparametric distributions in a } +\usage{ +apply_tolerance(x, tolerance) +} +\arguments{ +\item{x}{A \verb{}} + +\item{tolerance}{Numeric; the desired tolerance level.} +} +\value{ +A \verb{} where probability masses below the threshold level +have been removed +} +\description{ +This removes any part of the tail of the nonparametric distributions in the + where the probability mass is below the threshold level. +} +\examples{ +dist <- discretise(Gamma(mean = 5, sd = 1, max = 20)) +apply_tolerance(dist, 0.01) +} diff --git a/man/bootstrapped_dist_fit.Rd b/man/bootstrapped_dist_fit.Rd index ed0f28e54..a75d50ffc 100644 --- a/man/bootstrapped_dist_fit.Rd +++ b/man/bootstrapped_dist_fit.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/estimate_delay.R \name{bootstrapped_dist_fit} \alias{bootstrapped_dist_fit} \title{Fit a Subsampled Bootstrap to Integer Values and Summarise Distribution diff --git a/man/c.dist_spec.Rd b/man/c.dist_spec.Rd index 15bf25be1..7a9bfed87 100644 --- a/man/c.dist_spec.Rd +++ b/man/c.dist_spec.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{c.dist_spec} \alias{c.dist_spec} \title{Combines multiple delay distributions for further processing} @@ -16,3 +16,16 @@ Combined delay distributions (with class \verb{}) This combines the parameters so that they can be fed as multiple delay distributions to \code{\link[=epinow]{epinow()}} or \code{\link[=estimate_infections]{estimate_infections()}}. } +\examples{ +# A fixed lognormal distribution with mean 5 and sd 1. +dist1 <- LogNormal( + meanlog = 1.6, sdlog = 1, max = 20 +) +dist1 + dist1 + +# An uncertain gamma distribution with mean 3 and sd 2 +dist2 <- Gamma( + mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +) +c(dist1, dist2) +} diff --git a/man/check_stan_delay.Rd b/man/check_stan_delay.Rd new file mode 100644 index 000000000..fe33fa312 --- /dev/null +++ b/man/check_stan_delay.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_stan_delay} +\alias{check_stan_delay} +\title{Validate probability distribution for passing to stan} +\usage{ +check_stan_delay(dist) +} +\arguments{ +\item{dist}{A \code{dist_spec} object.`} +} +\value{ +Called for its side effects. +} +\description{ +\code{check_stan_delay()} checks that the supplied data is a \verb{}, +that it is lognormal or gamma, and that it has a finite maximum. +} +\keyword{internal} diff --git a/man/collapse.Rd b/man/collapse.Rd new file mode 100644 index 000000000..26424f862 --- /dev/null +++ b/man/collapse.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{collapse} +\alias{collapse} +\title{Collapse nonparametric distributions in a } +\usage{ +collapse(x) +} +\arguments{ +\item{x}{A \verb{}} +} +\value{ +A \verb{} where consecutive nonparametric distributions +have been convolved +} +\description{ +This convolves any consecutive nonparametric distributions contained +in the . +} +\examples{ +# A fixed gamma distribution with mean 5 and sd 1. +dist1 <- Gamma(mean = 5, sd = 1, max = 20) + +# An uncertain lognormal distribution with mean 3 and sd 2 +dist2 <- LogNormal(mean = 3, sd = 2, max = 20) + +# The maxf the sum of two distributions +collapse(discretise(dist1 + dist2)) +} diff --git a/man/convert_to_natural.Rd b/man/convert_to_natural.Rd new file mode 100644 index 000000000..ec1596efc --- /dev/null +++ b/man/convert_to_natural.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{convert_to_natural} +\alias{convert_to_natural} +\title{Internal function for converting parameters to natural parameters.} +\usage{ +convert_to_natural(params, distribution) +} +\arguments{ +\item{params}{A numerical named parameter vector} + +\item{distribution}{Character; the distribution to use.} +} +\value{ +A list with two elements, \code{params_mean} and \code{params_sd}, containing +mean and sd of natural parameters. +} +\description{ +This is used for preprocessing before generating a \code{dist_spec} object +from a given set of parameters and distribution +} +\examples{ +\dontrun{ +convert_to_natural( + params = list(mean = 2, sd = 1, max = Inf), + distribution = "gamma" +) +} +} +\keyword{internal} diff --git a/man/convolve_and_scale.Rd b/man/convolve_and_scale.Rd index 1d1a26ab0..ec8e74a16 100644 --- a/man/convolve_and_scale.Rd +++ b/man/convolve_and_scale.Rd @@ -90,8 +90,3 @@ cases \seealso{ estimate_secondary } -\author{ -Sam Abbott - -Sebastian Funk -} diff --git a/man/create_delay_inits.Rd b/man/create_delay_inits.Rd new file mode 100644 index 000000000..8ba670c90 --- /dev/null +++ b/man/create_delay_inits.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create.R +\name{create_delay_inits} +\alias{create_delay_inits} +\title{Create initial conditions for delays} +\usage{ +create_delay_inits(data) +} +\arguments{ +\item{data}{A list of data as produced by \code{\link[=create_stan_data]{create_stan_data()}}.} +} +\value{ +A list of initial conditions for delays +} +\description{ +Create initial conditions for delays +} +\keyword{internal} diff --git a/man/create_stan_delays.Rd b/man/create_stan_delays.Rd index 79fc44e8f..ceec9a27d 100644 --- a/man/create_stan_delays.Rd +++ b/man/create_stan_delays.Rd @@ -7,8 +7,7 @@ create_stan_delays(..., weight = 1) } \arguments{ -\item{...}{Named delay distributions specified using \code{dist_spec()}. -The names are assigned to IDs} +\item{...}{Named delay distributions. The names are assigned to IDs} \item{weight}{Numeric, weight associated with delay priors; default: 1} } diff --git a/man/delay_opts.Rd b/man/delay_opts.Rd index 02699c09c..46ffb13a6 100644 --- a/man/delay_opts.Rd +++ b/man/delay_opts.Rd @@ -4,15 +4,17 @@ \alias{delay_opts} \title{Delay Distribution Options} \usage{ -delay_opts(dist = dist_spec(), ..., fixed = FALSE) +delay_opts(dist = Fixed(0), ..., fixed = FALSE, tolerance = 0.001) } \arguments{ -\item{dist}{A delay distribution or series of delay distributions generated -using \code{\link[=dist_spec]{dist_spec()}}. Default is an empty call to \code{\link[=dist_spec]{dist_spec()}}, i.e. no delay} +\item{dist}{A delay distribution or series of delay distributions. Default is +a fixed distribution with all mass at 0, i.e. no delay.} \item{...}{deprecated; use \code{dist} instead} \item{fixed}{deprecated; use \code{dist} instead} + +\item{tolerance}{Numeric; the desired tolerance level.} } \value{ A \verb{} object summarising the input delay distributions. @@ -27,11 +29,11 @@ functions. delay_opts() # A single delay that has uncertainty -delay <- dist_spec(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 14) +delay <- LogNormal(mean = Normal(1, 0.2), sd = Normal(0.5, 0.1), max = 14) delay_opts(delay) # A single delay without uncertainty -delay <- dist_spec(mean = 1, sd = 0.5, max = 14) +delay <- LogNormal(meanlog = 1, sdlog = 0.5, max = 14) delay_opts(delay) # Multiple delays (in this case twice the same) diff --git a/man/discretise.Rd b/man/discretise.Rd new file mode 100644 index 000000000..f6163d29a --- /dev/null +++ b/man/discretise.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{discretise} +\alias{discretise} +\alias{discretize} +\title{Discretise a } +\usage{ +discretise(x, silent = TRUE) + +discretize(x, silent = TRUE) +} +\arguments{ +\item{x}{A \verb{}} + +\item{silent}{Logical; if \code{TRUE} then any distribution that can't be +discretised will be returned as is. If \code{FALSE} then an error will be +thrown.} +} +\value{ +A \verb{} where all distributions with constant parameters are +nonparametric. +} +\description{ +Discretise a +} +\details{ +By default it will discretise all the distributions it can discretise +(i.e. those with finite support and constant parameters). +} +\examples{ +# A fixed gamma distribution with mean 5 and sd 1. +dist1 <- Gamma(mean = 5, sd = 1, max = 20) + +# An uncertain lognormal distribution with mean 3 and sd 2 +dist2 <- LogNormal(mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20) + +# The maxf the sum of two distributions +discretise(dist1 + dist2) +} diff --git a/man/dist_fit.Rd b/man/dist_fit.Rd index df32dfb4c..fb26967c8 100644 --- a/man/dist_fit.Rd +++ b/man/dist_fit.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/estimate_delay.R \name{dist_fit} \alias{dist_fit} \title{Fit an Integer Adjusted Exponential, Gamma or Lognormal distributions} diff --git a/man/dist_skel.Rd b/man/dist_skel.Rd index 7df4625cd..84cf2f607 100644 --- a/man/dist_skel.Rd +++ b/man/dist_skel.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{dist_skel} \alias{dist_skel} \title{Distribution Skeleton} @@ -68,33 +68,35 @@ dist_skel(1:10, ## Gamma model # sample -dist_skel(10, model = "gamma", params = list(shape = 1, scale = 2)) +dist_skel(10, model = "gamma", params = list(shape = 1, rate = 0.5)) # cumulative prob density dist_skel(0:10, model = "gamma", dist = TRUE, - params = list(shape = 1, scale = 2) + params = list(shape = 1, rate = 0.5) ) # probability density dist_skel(0:10, model = "gamma", dist = TRUE, - cum = FALSE, params = list(shape = 2, scale = 2) + cum = FALSE, params = list(shape = 2, rate = 0.5) ) ## Log normal model # sample -dist_skel(10, model = "lognormal", params = list(mean = log(5), sd = log(2))) +dist_skel(10, + model = "lognormal", params = list(meanlog = log(5), sdlog = log(2)) +) # cumulative prob density dist_skel(0:10, model = "lognormal", dist = TRUE, - params = list(mean = log(5), sd = log(2)) + params = list(meanlog = log(5), sdlog = log(2)) ) # probability density dist_skel(0:10, model = "lognormal", dist = TRUE, cum = FALSE, - params = list(mean = log(5), sd = log(2)) + params = list(meanlog = log(5), sdlog = log(2)) ) } diff --git a/man/dist_spec.Rd b/man/dist_spec.Rd index a853ba54b..a2086b5f6 100644 --- a/man/dist_spec.Rd +++ b/man/dist_spec.Rd @@ -1,93 +1,59 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/deprecated.R \name{dist_spec} \alias{dist_spec} \title{Specify a distribution.} \usage{ dist_spec( + distribution = c("lognormal", "normal", "gamma", "fixed", "empty"), + params_mean = numeric(0), + params_sd = numeric(0), mean, sd = 0, mean_sd = 0, sd_sd = 0, - distribution = c("lognormal", "gamma"), - max, + max = Inf, pmf = numeric(0), fixed = FALSE ) } \arguments{ -\item{mean}{Numeric. If the only non-zero summary parameter -then this is the fixed interval of the distribution. If the \code{sd} is -non-zero then this is the mean of the distribution given by \code{dist}. -If this is not given a vector of empty vectors is returned.} +\item{distribution}{Character, defaults to "lognormal". The (discretised) +distribution to be used. Can be "lognormal", "gamma", "normal" or "fixed". +The corresponding parameters (defined in \code{\link[=natural_params]{natural_params()}}) are passed +as \code{params_mean}, and their uncertainty as \code{params_sd}.} -\item{sd}{Numeric, defaults to 0. Sets the standard deviation of the -distribution.} +\item{params_mean}{Numeric. Central values of the parameters of the +distribution as defined in [natural_params().} -\item{mean_sd}{Numeric, defaults to 0. Sets the standard deviation of the -uncertainty around the mean of the distribution assuming a normal -prior.} +\item{params_sd}{Numeric. Standard deviations of the parameters of the +distribution as defined in [natural_params().} -\item{sd_sd}{Numeric, defaults to 0. Sets the standard deviation of the -uncertainty around the sd of the distribution assuming a normal prior.} +\item{mean}{Deprecated; use \code{params_mean} instead.} -\item{distribution}{Character, defaults to "lognormal". The (discretised -distribution to be used. If sd == 0 then the distribution is fixed and a -delta function is used. If sd > 0 then the distribution is discretised and -truncated. +\item{sd}{Deprecated; use \code{params_mean} instead.} -The following distributions are currently supported: -\itemize{ -\item "lognormal" - a lognormal distribution. For this distribution \code{mean} -is the mean of the natural logarithm of the delay (on the log scale) and -\code{sd} is the standard deviation of the natural logarithm of the delay. -\item "gamma" - a gamma distribution. For this distribution \code{mean} is the -mean of the delay and \code{sd} is the standard deviation of the delay. During -model fitting these are then transformed to the shape and scale of the gamma -distribution. -} +\item{mean_sd}{Deprecated; use \code{params_sd} instead.} -When \code{distribution} is the default lognormal distribution the other function -arguments have the following definition: -\itemize{ -\item \code{mean} is the mean of the natural logarithm of the delay (on the -log scale). -\item \code{sd} is the standard deviation of the natural logarithm of the delay. -}} +\item{sd_sd}{Deprecated; use \code{params_sd} instead.} \item{max}{Numeric, maximum value of the distribution. The distribution will -be truncated at this value.} +be truncated at this value. Default: \code{Inf}, i.e. no maximum.} \item{pmf}{Numeric, a vector of values that represent the (nonparametric) probability mass function of the delay (starting with 0); defaults to an empty vector corresponding to a parametric specification of the distribution -(using \code{mean}, \code{sd} and corresponding uncertainties)} +(using \code{params_mean}, and \code{params_sd}.} -\item{fixed}{Deprecated, use \code{\link[=fix_dist]{fix_dist()}} instead -as coming from fixed (vs uncertain) distributions. Overrides any values -assigned to \code{mean_sd} and \code{sd_sd} by setting them to zero. -reduces compute requirement but may produce spuriously precise estimates.} +\item{fixed}{Deprecated, use \code{\link[=fix_dist]{fix_dist()}} instead.} } \value{ A list of distribution options. } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -Defines the parameters of a supported distribution for use in onward -modelling. Multiple distribution families are supported - see the -documentation for \code{family} for details. Alternatively, a nonparametric -distribution can be specified using the \code{pmf} argument. -This function provides distribution -functionality in \code{\link[=delay_opts]{delay_opts()}}, \code{\link[=generation_time_opts]{generation_time_opts()}}, and -\code{\link[=trunc_opts]{trunc_opts()}}. -} -\examples{ -# A fixed lognormal distribution with mean 5 and sd 1. -dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") - -# An uncertain gamma distribution with mean 3 and sd 2 -dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, - distribution = "gamma" -) +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +This function is deprecated as a user-facing function (while its +functionality is still used internally). Construct distributions using +the corresponding distribution function such as \code{\link[=Gamma]{Gamma()}}, \code{\link[=LogNormal]{LogNormal()}}, +\code{\link[=Normal]{Normal()}} or \code{\link[=Fixed]{Fixed()}} instead. } diff --git a/man/dist_spec_plus.Rd b/man/dist_spec_plus.Rd deleted file mode 100644 index 0886a791c..000000000 --- a/man/dist_spec_plus.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R -\name{dist_spec_plus} -\alias{dist_spec_plus} -\title{Creates a delay distribution as the sum of two other delay distributions} -\usage{ -dist_spec_plus(e1, e2, tolerance = 0.001) -} -\arguments{ -\item{e1}{The first delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to -combine.} - -\item{e2}{The second delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to -combine.} - -\item{tolerance}{A numeric value that sets the cumulative probability -to retain when truncating the cumulative distribution function of the -combined nonparametric delays. The default value is 0.001 with this retaining -0.999 of the cumulative probability. Note that using a larger tolerance may -result in a smaller number of points in the combined nonparametric delay but -may also impact the accuracy of the combined delay (i.e., change the mean -and standard deviation).} -} -\value{ -A delay distribution representing the sum of the two delays -(with class \code{\link[=dist_spec]{dist_spec()}}) -} -\description{ -This is done via convolution with \code{\link[stats:convolve]{stats::convolve()}}. Nonparametric delays -that can be combined are processed together, and their cumulative -distribution function is truncated at a specified tolerance level, ensuring -numeric stability. -} diff --git a/man/epinow.Rd b/man/epinow.Rd index f4cd6b8e7..9fbb7f142 100644 --- a/man/epinow.Rd +++ b/man/epinow.Rd @@ -129,30 +129,21 @@ options(mc.cores = ifelse(interactive(), 4, 1)) # set an example generation time. In practice this should use an estimate # from the literature or be estimated from data -generation_time <- dist_spec( - mean = 3.6, - mean_sd = 0.7, - sd = 3.1, - sd_sd = 0.8, +generation_time <- Gamma( + shape = Normal(1.3, 0.3), + rate = Normal(0.37, 0.09), max = 14 ) # set an example incubation period. In practice this should use an estimate # from the literature or be estimated from data -incubation_period <- dist_spec( - mean = 1.6, - mean_sd = 0.06, - sd = 0.4, - sd_sd = 0.07, +incubation_period <- LogNormal( + meanlog = Normal(1.6, 0.06), + sdlog = Normal(0.4, 0.07), max = 14 ) # set an example reporting delay. In practice this should use an estimate # from the literature or be estimated from data -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), - sd = convert_to_logsd(2, 1), - max = 10, - dist = "lognormal" -) +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) # example case data reported_cases <- example_confirmed[1:40] diff --git a/man/estimate_delay.Rd b/man/estimate_delay.Rd index 2e29d2ddc..d0141db25 100644 --- a/man/estimate_delay.Rd +++ b/man/estimate_delay.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/estimate_delay.R \name{estimate_delay} \alias{estimate_delay} \title{Estimate a Delay Distribution} diff --git a/man/estimate_infections.Rd b/man/estimate_infections.Rd index 33a24f2de..731b8772f 100644 --- a/man/estimate_infections.Rd +++ b/man/estimate_infections.Rd @@ -118,31 +118,21 @@ reported_cases <- example_confirmed[1:60] # set an example generation time. In practice this should use an estimate # from the literature or be estimated from data -generation_time <- dist_spec( - mean = 3.6, - mean_sd = 0.7, - sd = 3.1, - sd_sd = 0.8, +generation_time <- Gamma( + shape = Normal(1.3, 0.3), + rate = Normal(0.37, 0.09), max = 14 ) # set an example incubation period. In practice this should use an estimate # from the literature or be estimated from data -incubation_period <- dist_spec( - mean = 1.6, - mean_sd = 0.06, - sd = 0.4, - sd_sd = 0.07, +incubation_period <- LogNormal( + meanlog = Normal(1.6, 0.06), + sdlog = Normal(0.4, 0.07), max = 14 ) # set an example reporting delay. In practice this should use an estimate # from the literature or be estimated from data -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), - sd = convert_to_logsd(2, 1), - max = 10, - dist = "lognormal" -) - +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) # for more examples, see the "estimate_infections examples" vignette def <- estimate_infections(reported_cases, diff --git a/man/estimate_secondary.Rd b/man/estimate_secondary.Rd index 6f0b121a4..ac10191eb 100644 --- a/man/estimate_secondary.Rd +++ b/man/estimate_secondary.Rd @@ -7,8 +7,8 @@ estimate_secondary( reports, secondary = secondary_opts(), - delays = delay_opts(dist_spec(mean = 2.5, mean_sd = 0.5, sd = 0.47, sd_sd = 0.25, max = - 30)), + delays = delay_opts(LogNormal(meanlog = Normal(2.5, 0.5), sdlog = Normal(0.47, 0.25), + max = 30)), truncation = trunc_opts(), obs = obs_opts(), stan = stan_opts(), diff --git a/man/estimate_truncation.Rd b/man/estimate_truncation.Rd index cd4daf547..a46ca2fc5 100644 --- a/man/estimate_truncation.Rd +++ b/man/estimate_truncation.Rd @@ -9,7 +9,8 @@ estimate_truncation( max_truncation, trunc_max = 10, trunc_dist = "lognormal", - truncation = trunc_opts(dist_spec(mean = 0, mean_sd = 1, sd = 0, sd_sd = 1, max = 10)), + truncation = trunc_opts(LogNormal(meanlog = Normal(0, 1), sdlog = Normal(1, 1), max = + 10)), model = NULL, stan = stan_opts(), CrIs = c(0.2, 0.5, 0.9), diff --git a/man/extract_params.Rd b/man/extract_params.Rd new file mode 100644 index 000000000..34387b5f2 --- /dev/null +++ b/man/extract_params.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{extract_params} +\alias{extract_params} +\title{Internal function for extracting given parameter names of a distribution +from the environment. Called by \code{new_dist_spec}} +\usage{ +extract_params(params, distribution) +} +\arguments{ +\item{params}{Given parameters (obtained using \code{as.list(environment())})} + +\item{distribution}{Character; the distribution to use.} +} +\value{ +A character vector of parameters and their values. +} +\description{ +Internal function for extracting given parameter names of a distribution +from the environment. Called by \code{new_dist_spec} +} +\keyword{internal} diff --git a/man/extract_single_dist.Rd b/man/extract_single_dist.Rd new file mode 100644 index 000000000..9e79c5361 --- /dev/null +++ b/man/extract_single_dist.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{extract_single_dist} +\alias{extract_single_dist} +\title{Extract a single element of a composite \verb{}} +\usage{ +extract_single_dist(x, i) +} +\arguments{ +\item{x}{A composite \code{dist_spec} object} + +\item{i}{The index to extract} +} +\value{ +A single \code{dist_spec} object +} +\description{ +Extract a single element of a composite \verb{} +} +\examples{ +dist1 <- LogNormal(mean = 1.6, sd = 0.5, max = 20) + +# An uncertain gamma distribution with mean 3 and sd 2 +dist2 <- Gamma( + mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 +) + +# Multiple distributions +\dontrun{ + dist <- dist1 + dist2 + extract_single_dist(dist, 2) +} +} +\keyword{internal} diff --git a/man/fix_dist.Rd b/man/fix_dist.Rd index 52846c51c..4a78dd6a9 100644 --- a/man/fix_dist.Rd +++ b/man/fix_dist.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{fix_dist} \alias{fix_dist} -\title{Fix the parameters of a \verb{} object} +\title{Fix the parameters of a \verb{}} \usage{ fix_dist(x, strategy = c("mean", "sample")) } \arguments{ -\item{x}{A \verb{} object} +\item{x}{A \verb{}} \item{strategy}{Character; either "mean" (use the mean estimates of the mean and standard deviation) or "sample" (randomly sample mean and @@ -20,3 +20,11 @@ A \verb{} object without uncertainty If the given \verb{} has any uncertainty, it is removed and the corresponding distribution converted into a fixed one. } +\examples{ +# An uncertain gamma distribution with mean 3 and sd 2 +dist <- LogNormal( + meanlog = Normal(3, 0.5), sdlog = Normal(2, 0.5), max = 20 +) + +fix_dist(dist) +} diff --git a/man/gamma_dist_def.Rd b/man/gamma_dist_def.Rd index 7ef135149..39680f9d3 100644 --- a/man/gamma_dist_def.Rd +++ b/man/gamma_dist_def.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/deprecated.R \name{gamma_dist_def} \alias{gamma_dist_def} \title{Generate a Gamma Distribution Definition Based on Parameter Estimates} @@ -43,10 +43,8 @@ Samples outside of this range are resampled.} A \verb{} defining the distribution as used by \code{\link[=dist_skel]{dist_skel()}} } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#soft-deprecated}{\figure{lifecycle-soft-deprecated.svg}{options: alt='[Soft-deprecated]'}}}{\strong{[Soft-deprecated]}} -Generates a distribution definition when only parameter estimates -are available for gamma distributed parameters. See \code{\link[=rgamma]{rgamma()}} for -distribution information. +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Deprecated; use \code{\link[=Gamma]{Gamma()}} instead to define a gamma distribution. } \examples{ # using estimated shape and scale diff --git a/man/generation_time_opts.Rd b/man/generation_time_opts.Rd index 7f3bdf647..95f8b2e3f 100644 --- a/man/generation_time_opts.Rd +++ b/man/generation_time_opts.Rd @@ -5,19 +5,19 @@ \title{Generation Time Distribution Options} \usage{ generation_time_opts( - dist = dist_spec(mean = 1), + dist = Fixed(1), ..., disease, source, max = 14, fixed = FALSE, - prior_weight + prior_weight, + tolerance = 0.001 ) } \arguments{ -\item{dist}{A delay distribution or series of delay distributions generated -using \code{\link[=dist_spec]{dist_spec()}}. If no distribution is given a fixed generation time of -1 will be assumed.} +\item{dist}{A delay distribution or series of delay distributions . If no +distribution is given a fixed generation time of 1 will be assumed.} \item{...}{deprecated; use \code{dist} instead} @@ -32,6 +32,8 @@ using \code{\link[=dist_spec]{dist_spec()}}. If no distribution is given a fixed \item{prior_weight}{deprecated; prior weights are now specified as a model option. Use the \code{weigh_delay_priors} argument of \code{\link[=estimate_infections]{estimate_infections()}} instead.} + +\item{tolerance}{Numeric; the desired tolerance level.} } \value{ A \verb{} object summarising the input delay @@ -46,11 +48,15 @@ Returns generation time parameters in a format for lower level model use. generation_time_opts() # A fixed gamma distributed generation time -generation_time_opts(dist_spec(mean = 3, sd = 2, max = 14)) +generation_time_opts(Gamma(mean = 3, sd = 2, max = 14)) # An uncertain gamma distributed generation time generation_time_opts( - dist_spec(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5, max = 14) + Gamma( + mean = Normal(mean = 3, sd = 1), + sd = Normal(mean = 2, sd = 0.5), + max = 14 + ) ) # An example generation time @@ -58,5 +64,5 @@ generation_time_opts(example_generation_time) } \seealso{ \code{\link[=convert_to_logmean]{convert_to_logmean()}} \code{\link[=convert_to_logsd]{convert_to_logsd()}} -\code{\link[=bootstrapped_dist_fit]{bootstrapped_dist_fit()}} \code{\link[=dist_spec]{dist_spec()}} +\code{\link[=bootstrapped_dist_fit]{bootstrapped_dist_fit()}} \code{\link[=Gamma]{Gamma()}} \code{\link[=LogNormal]{LogNormal()}} \code{\link[=Fixed]{Fixed()}} } diff --git a/man/get_dist.Rd b/man/get_dist.Rd index 029ec46ea..46ce221e1 100644 --- a/man/get_dist.Rd +++ b/man/get_dist.Rd @@ -25,7 +25,7 @@ A list defining a distribution \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This function has been deprecated. Please specify a distribution -using \code{\link[=dist_spec]{dist_spec()}} instead. +using functions such as \code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}} instead. } \seealso{ \code{\link[=dist_spec]{dist_spec()}} diff --git a/man/get_generation_time.Rd b/man/get_generation_time.Rd index b1fa3293f..7b9e4f84a 100644 --- a/man/get_generation_time.Rd +++ b/man/get_generation_time.Rd @@ -24,7 +24,7 @@ A list defining a distribution Extracts a literature distribution from \code{generation_times}. This function has been deprecated. Please specify a distribution -using \code{\link[=dist_spec]{dist_spec()}} instead. +using functions such as \code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}} instead. } \seealso{ \code{\link[=dist_spec]{dist_spec()}} diff --git a/man/get_incubation_period.Rd b/man/get_incubation_period.Rd index b514a02c1..98f97f023 100644 --- a/man/get_incubation_period.Rd +++ b/man/get_incubation_period.Rd @@ -24,7 +24,7 @@ A list defining a distribution Extracts a literature distribution from \code{generation_times}. This function has been deprecated. Please specify a distribution -using \code{\link[=dist_spec]{dist_spec()}} instead +using functions such as \code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}} instead. } \seealso{ \code{\link[=dist_spec]{dist_spec()}} diff --git a/man/get_seeding_time.Rd b/man/get_seeding_time.Rd index 4a0058995..862ebfc80 100644 --- a/man/get_seeding_time.Rd +++ b/man/get_seeding_time.Rd @@ -7,9 +7,11 @@ get_seeding_time(delays, generation_time) } \arguments{ -\item{delays}{Delays as specified using \code{\link[=dist_spec]{dist_spec()}}} +\item{delays}{Delays specified using distribution functions such as +\code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}}} -\item{generation_time}{Generation time as specified using \code{\link[=dist_spec]{dist_spec()}}} +\item{generation_time}{Generation specified using distribution functions +such as \code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}}} } \value{ An integer seeding time diff --git a/man/lognorm_dist_def.Rd b/man/lognorm_dist_def.Rd index 766b40834..b36209cac 100644 --- a/man/lognorm_dist_def.Rd +++ b/man/lognorm_dist_def.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/deprecated.R \name{lognorm_dist_def} \alias{lognorm_dist_def} \title{Generate a Log Normal Distribution Definition Based on Parameter Estimates} @@ -26,7 +26,7 @@ Samples outside of this range are resampled.} A \verb{} defining the distribution as used by \code{\link[=dist_skel]{dist_skel()}} } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#soft-deprecated}{\figure{lifecycle-soft-deprecated.svg}{options: alt='[Soft-deprecated]'}}}{\strong{[Soft-deprecated]}} +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Generates a distribution definition when only parameter estimates are available for log normal distributed parameters. See \code{\link[=rlnorm]{rlnorm()}} for distribution information. diff --git a/man/lower_bounds.Rd b/man/lower_bounds.Rd new file mode 100644 index 000000000..3b54b3e6b --- /dev/null +++ b/man/lower_bounds.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{lower_bounds} +\alias{lower_bounds} +\title{Get the lower bounds of the parameters of a distribution} +\usage{ +lower_bounds(distribution) +} +\arguments{ +\item{distribution}{Character; the distribution to use.} +} +\value{ +A numeric vector, the lower bounds. +} +\description{ +This is used to avoid sampling parameter values that have no support. +} +\examples{ +\dontrun{ +lower_bounds("lognormal") +} +} +\keyword{internal} diff --git a/man/max.dist_spec.Rd b/man/max.dist_spec.Rd new file mode 100644 index 000000000..9db3a2d40 --- /dev/null +++ b/man/max.dist_spec.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{max.dist_spec} +\alias{max.dist_spec} +\title{Returns the maximum of one or more delay distribution} +\usage{ +\method{max}{dist_spec}(x, ...) +} +\arguments{ +\item{x}{The \code{\link[=dist_spec]{dist_spec()}} to use} + +\item{...}{Not used} +} +\value{ +A vector of means. +} +\description{ +This works out the maximum of all the (parametric / nonparametric) delay +distributions combined in the passed \code{\link[=dist_spec]{dist_spec()}} (ignoring any uncertainty +in parameters) +} +\examples{ +# A fixed gamma distribution with mean 5 and sd 1. +dist1 <- Gamma(mean = 5, sd = 1, max = 20) +max(dist1) + +# An uncertain lognormal distribution with mean 3 and sd 2 +dist2 <- LogNormal(mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20) +max(dist2) + +# The max the sum of two distributions +max(dist1 + dist2) +} diff --git a/man/mean.dist_spec.Rd b/man/mean.dist_spec.Rd index 0cf374ce8..b508ce7c8 100644 --- a/man/mean.dist_spec.Rd +++ b/man/mean.dist_spec.Rd @@ -1,37 +1,36 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{mean.dist_spec} \alias{mean.dist_spec} \title{Returns the mean of one or more delay distribution} \usage{ -\method{mean}{dist_spec}(x, ...) +\method{mean}{dist_spec}(x, ..., ignore_uncertainty = FALSE) } \arguments{ \item{x}{The \verb{} to use} \item{...}{Not used} -} -\value{ -A vector of means. + +\item{ignore_uncertainty}{Logical; whether to ignore any uncertainty in +parameters. If set to FALSE (the default) then the mean of any uncertain +parameters will be returned as NA.} } \description{ This works out the mean of all the (parametric / nonparametric) delay -distributions combined in the passed \code{\link[=dist_spec]{dist_spec()}}. +distributions combined in the passed \code{\link[=dist_spec]{dist_spec()}} (ignoring any uncertainty +in parameters) } \examples{ # A fixed lognormal distribution with mean 5 and sd 1. -lognormal <- dist_spec( - mean = 5, sd = 1, max = 20, distribution = "lognormal" -) -mean(lognormal) +dist1 <- LogNormal(mean = 5, sd = 1, max = 20) +mean(dist1) # An uncertain gamma distribution with mean 3 and sd 2 -gamma <- dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, - distribution = "gamma" +dist2 <- Gamma( + mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 ) -mean(gamma) +mean(dist2) # The mean of the sum of two distributions -mean(lognormal + gamma) +mean(dist1 + dist2) } diff --git a/man/natural_params.Rd b/man/natural_params.Rd new file mode 100644 index 000000000..6e7812b9f --- /dev/null +++ b/man/natural_params.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{natural_params} +\alias{natural_params} +\title{Get the names of the natural parameters of a distribution} +\usage{ +natural_params(distribution) +} +\arguments{ +\item{distribution}{Character; the distribution to use.} +} +\value{ +A character vector, the natural parameters. +} +\description{ +These are the parameters used in the stan models. All other parameter +representations are converted to these using \code{\link[=convert_to_natural]{convert_to_natural()}} before +being passed to the stan models. +} +\examples{ +\dontrun{ +natural_params("gamma") +} +} +\keyword{internal} diff --git a/man/new_dist_spec.Rd b/man/new_dist_spec.Rd new file mode 100644 index 000000000..b0a3fb553 --- /dev/null +++ b/man/new_dist_spec.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{new_dist_spec} +\alias{new_dist_spec} +\title{Internal function for generating a \code{dist_spec} given parameters and a +distribution.} +\usage{ +new_dist_spec(params, distribution) +} +\arguments{ +\item{params}{Parameters of the distribution (including \code{max})} + +\item{distribution}{Character; the distribution to use.} +} +\value{ +A \code{dist_spec} of the given specification. +} +\description{ +This will convert all parameters to natural parameters before generating +a \code{dist_spec}. If they have uncertainty this will be done using sampling. +} +\examples{ +\dontrun{ +new_dist_spec( + params = list(mean = 2, sd = 1, max = Inf), + distribution = "normal" +) +} +} +\keyword{internal} diff --git a/man/plot.dist_spec.Rd b/man/plot.dist_spec.Rd index cda878a99..c6b80f5fb 100644 --- a/man/plot.dist_spec.Rd +++ b/man/plot.dist_spec.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{plot.dist_spec} \alias{plot.dist_spec} \title{Plot PMF and CDF for a dist_spec object} @@ -18,21 +18,18 @@ Note that currently uncertainty in distributions is not plot. } \examples{ #' # A fixed lognormal distribution with mean 5 and sd 1. -lognormal <- dist_spec( - mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal" -) -plot(lognormal) +dist1 <- LogNormal(mean = 1.6, sd = 0.5, max = 20) +plot(dist1) # An uncertain gamma distribution with mean 3 and sd 2 -gamma <- dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, - distribution = "gamma" +dist2 <- Gamma( + mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 ) -plot(gamma) +plot(dist2) # Multiple distributions -plot(lognormal + gamma + lognormal) +plot(dist1 + dist2 + dist1) # A combination of the two fixed distributions -plot(lognormal + lognormal) +plot(dist1 + dist1) } diff --git a/man/plus-.dist_spec.Rd b/man/plus-.dist_spec.Rd index 962da9708..6a010bab4 100644 --- a/man/plus-.dist_spec.Rd +++ b/man/plus-.dist_spec.Rd @@ -1,42 +1,34 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{+.dist_spec} \alias{+.dist_spec} -\title{Creates a delay distribution as the sum of two other delay distributions} +\title{Creates a delay distribution as the sum of two other delay distributions.} \usage{ \method{+}{dist_spec}(e1, e2) } \arguments{ -\item{e1}{The first delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to +\item{e1}{The first delay distribution (of type \code{\link[=dist_spec]{dist_spec()}}) to combine.} -\item{e2}{The second delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to +\item{e2}{The second delay distribution (of type \code{\link[=dist_spec]{dist_spec()}}) to combine.} } \value{ A delay distribution representing the sum of the two delays -(with class \code{\link[=dist_spec]{dist_spec()}}) } \description{ -This is done via convolution with \code{\link[stats:convolve]{stats::convolve()}}. Nonparametric delays -that can be combined are processed together, and their cumulative -distribution function is truncated at a specified tolerance level, ensuring -numeric stability. +Creates a delay distribution as the sum of two other delay distributions. } \examples{ # A fixed lognormal distribution with mean 5 and sd 1. -lognormal <- dist_spec( - mean = 1.6, sd = 1, max = 20, distribution = "lognormal" +dist1 <- LogNormal( + meanlog = 1.6, sdlog = 1, max = 20 ) -lognormal + lognormal +dist1 + dist1 # An uncertain gamma distribution with mean 3 and sd 2 -gamma <- dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, - distribution = "gamma" +dist2 <- Gamma( + mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 ) -lognormal + gamma - -# Using tolerance parameter -EpiNow2:::dist_spec_plus(lognormal, lognormal, tolerance = 0.5) +dist1 + dist2 } diff --git a/man/print.dist_spec.Rd b/man/print.dist_spec.Rd index c2e6a294e..337b4781e 100644 --- a/man/print.dist_spec.Rd +++ b/man/print.dist_spec.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/dist_spec.R \name{print.dist_spec} \alias{print.dist_spec} \title{Prints the parameters of one or more delay distributions} @@ -20,15 +20,12 @@ functions of fixed delay distributions combined in the passed \code{\link[=dist_ } \examples{ #' # A fixed lognormal distribution with mean 5 and sd 1. -lognormal <- dist_spec( - mean = 1.5, sd = 0.5, max = 20, distribution = "lognormal" -) -print(lognormal) +dist1 <- LogNormal(mean = 1.5, sd = 0.5, max = 20) +print(dist1) # An uncertain gamma distribution with mean 3 and sd 2 -gamma <- dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, - distribution = "gamma" +dist2 <- Gamma( + mean = Normal(3, 0.5), sd = Normal(2, 0.5), max = 20 ) -print(gamma) +print(dist2) } diff --git a/man/sample_approx_dist.Rd b/man/sample_approx_dist.Rd index 817fbca38..d21926e1d 100644 --- a/man/sample_approx_dist.Rd +++ b/man/sample_approx_dist.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dist.R +% Please edit documentation in R/adjust.R \name{sample_approx_dist} \alias{sample_approx_dist} \title{Approximate Sampling a Distribution using Counts} diff --git a/man/sd_dist.Rd b/man/sd_dist.Rd new file mode 100644 index 000000000..22f5ca1f1 --- /dev/null +++ b/man/sd_dist.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_spec.R +\name{sd_dist} +\alias{sd_dist} +\title{Returns the standard deviation of one or more delay distribution} +\usage{ +sd_dist(x) +} +\arguments{ +\item{x}{The \code{\link[=dist_spec]{dist_spec()}} to use} +} +\value{ +A vector of standard deviations. +} +\description{ +This works out the standard deviation of all the (parametric / +nonparametric) delay distributions combined in the passed \code{\link[=dist_spec]{dist_spec()}}. +} +\examples{ +\dontrun{ +# A fixed lognormal distribution with sd 5 and sd 1. +dist1 <- LogNormal(mean = 5, sd = 1, max = 20) +sd_dist(dist1) + +# A gamma distribution with mean 3 and sd 2 +dist2 <- Gamma(mean = 3, sd = 2) +sd_dist(dist2) + +# The sd of the sum of two distributions +sd_dist(dist1 + dist2) +} +} +\keyword{internal} diff --git a/man/simulate_infections.Rd b/man/simulate_infections.Rd index a78d859fe..e976390e4 100644 --- a/man/simulate_infections.Rd +++ b/man/simulate_infections.Rd @@ -79,7 +79,7 @@ Uncertain parameters are not allowed. A previous function called \code{\link[=simulate_infections]{simulate_infections()}} that simulates from a given model fit has been renamed \code{\link[=forecast_infections]{forecast_infections()}}. Using \code{\link[=simulate_infections]{simulate_infections()}} with existing estimates is now deprecated. This -option will be removed in version 2.1.0. +option will be removed in version 2.0.0. } \examples{ \donttest{ diff --git a/man/trunc_opts.Rd b/man/trunc_opts.Rd index 871980a91..1f9b0dedc 100644 --- a/man/trunc_opts.Rd +++ b/man/trunc_opts.Rd @@ -4,12 +4,14 @@ \alias{trunc_opts} \title{Truncation Distribution Options} \usage{ -trunc_opts(dist = dist_spec()) +trunc_opts(dist = Fixed(0), tolerance = 0.001) } \arguments{ \item{dist}{A delay distribution or series of delay distributions reflecting the truncation generated using \code{\link[=dist_spec]{dist_spec()}} or \code{\link[=estimate_truncation]{estimate_truncation()}}. -Default is an empty call to \code{\link[=dist_spec]{dist_spec()}}, i.e. no truncation} +Default is fixed distribution with maximum 0, i.e. no truncation} + +\item{tolerance}{Numeric; the desired tolerance level.} } \value{ A \verb{} object summarising the input truncation @@ -26,7 +28,7 @@ estimate these distributions. trunc_opts() # truncation dist -trunc_opts(dist = dist_spec(mean = 3, sd = 2, max = 10)) +trunc_opts(dist = LogNormal(mean = 3, sd = 2, max = 10)) } \seealso{ \code{\link[=convert_to_logmean]{convert_to_logmean()}} \code{\link[=convert_to_logsd]{convert_to_logsd()}} diff --git a/man/update_secondary_args.Rd b/man/update_secondary_args.Rd index fa2b7c2ee..18849cbac 100644 --- a/man/update_secondary_args.Rd +++ b/man/update_secondary_args.Rd @@ -13,9 +13,9 @@ update_secondary_args(data, priors, verbose = TRUE) rather than the defaults supplied from other arguments. This is typically useful if wanting to inform a estimate from the posterior of another model fit. Priors that are currently use to update the defaults are the scaling -fraction ("frac_obs"), the mean delay ("delay_mean"), and standard deviation -of the delay ("delay_sd"). The \verb{} should have the following -variables: \code{variable}, \code{mean}, and \code{sd}.} +fraction ("frac_obs"), and delay parameters ("delay_params"). The +\verb{} should have the following variables: \code{variable}, \code{mean}, and +\code{sd}.} \item{verbose}{Logical, defaults to \code{FALSE}. Should verbose progress messages be returned.} @@ -26,9 +26,9 @@ A list as produced by \code{create_stan_data()}. \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} This functions allows the user to more easily specify data driven or model -based priors for \code{\link[=estimate_secondary]{estimate_secondary()}} from example from previous model fits -using a \verb{} to overwrite other default settings. Note that default -settings are still required. +based priors for \code{\link[=estimate_secondary]{estimate_secondary()}} from example from previous model +fits using a \verb{} to overwrite other default settings. Note that +default settings are still required. } \examples{ priors <- data.frame(variable = "frac_obs", mean = 3, sd = 1) diff --git a/tests/testthat/_snaps/simulate-infections.md b/tests/testthat/_snaps/simulate-infections.md index 9f317f1c6..066d5787e 100644 --- a/tests/testthat/_snaps/simulate-infections.md +++ b/tests/testthat/_snaps/simulate-infections.md @@ -36,33 +36,33 @@ variable date value - 1: infections 2023-01-01 240.6049 - 2: infections 2023-01-02 253.8951 - 3: infections 2023-01-03 267.7099 - 4: infections 2023-01-04 282.1928 - 5: infections 2023-01-05 297.4201 - 6: infections 2023-01-06 313.4477 - 7: infections 2023-01-07 330.3258 - 8: infections 2023-01-08 232.0685 - 9: infections 2023-01-09 222.6294 - 10: infections 2023-01-10 212.0540 - 11: infections 2023-01-11 201.3581 - 12: infections 2023-01-12 190.8991 - 13: infections 2023-01-13 180.8132 - 14: infections 2023-01-14 171.1484 - 15: reported_cases 2023-01-01 155.0000 - 16: reported_cases 2023-01-02 563.0000 - 17: reported_cases 2023-01-03 146.0000 - 18: reported_cases 2023-01-04 644.0000 - 19: reported_cases 2023-01-05 282.0000 - 20: reported_cases 2023-01-06 473.0000 - 21: reported_cases 2023-01-07 193.0000 - 22: reported_cases 2023-01-08 262.0000 - 23: reported_cases 2023-01-09 19.0000 - 24: reported_cases 2023-01-10 277.0000 - 25: reported_cases 2023-01-11 177.0000 - 26: reported_cases 2023-01-12 273.0000 - 27: reported_cases 2023-01-13 97.0000 - 28: reported_cases 2023-01-14 265.0000 + 1: infections 2023-01-01 214.1121 + 2: infections 2023-01-02 225.6793 + 3: infections 2023-01-03 237.8341 + 4: infections 2023-01-04 250.6285 + 5: infections 2023-01-05 264.1041 + 6: infections 2023-01-06 278.3004 + 7: infections 2023-01-07 293.2573 + 8: infections 2023-01-08 206.0109 + 9: infections 2023-01-09 197.6193 + 10: infections 2023-01-10 188.2225 + 11: infections 2023-01-11 178.7223 + 12: infections 2023-01-12 169.4362 + 13: infections 2023-01-13 160.4852 + 14: infections 2023-01-14 151.9122 + 15: reported_cases 2023-01-01 157.0000 + 16: reported_cases 2023-01-02 146.0000 + 17: reported_cases 2023-01-03 164.0000 + 18: reported_cases 2023-01-04 136.0000 + 19: reported_cases 2023-01-05 601.0000 + 20: reported_cases 2023-01-06 263.0000 + 21: reported_cases 2023-01-07 443.0000 + 22: reported_cases 2023-01-08 173.0000 + 23: reported_cases 2023-01-09 201.0000 + 24: reported_cases 2023-01-10 15.0000 + 25: reported_cases 2023-01-11 229.0000 + 26: reported_cases 2023-01-12 151.0000 + 27: reported_cases 2023-01-13 232.0000 + 28: reported_cases 2023-01-14 82.0000 variable date value diff --git a/tests/testthat/_snaps/simulate-secondary.md b/tests/testthat/_snaps/simulate-secondary.md index 2ebde8711..04845a6fb 100644 --- a/tests/testthat/_snaps/simulate-secondary.md +++ b/tests/testthat/_snaps/simulate-secondary.md @@ -20,13 +20,13 @@ 1: 2020-02-22 2 2: 2020-02-23 18 - 3: 2020-02-24 86 - 4: 2020-02-25 32 - 5: 2020-02-26 188 + 3: 2020-02-24 83 + 4: 2020-02-25 31 + 5: 2020-02-26 185 --- - 126: 2020-06-26 407 - 127: 2020-06-27 154 - 128: 2020-06-28 215 - 129: 2020-06-29 226 - 130: 2020-06-30 211 + 126: 2020-06-26 163 + 127: 2020-06-27 375 + 128: 2020-06-28 124 + 129: 2020-06-29 172 + 130: 2020-06-30 194 diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index ce0ed932d..4b63e43af 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -18,6 +18,4 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) { withr::defer(future::plan("sequential"), teardown_env()) ## process warning once as previous behaviour has been deprecated -empty <- suppressWarnings(dist_spec()) - - +dummy <- suppressWarnings(dist_spec(mean = 0, sd = 1, max = 5)) diff --git a/tests/testthat/test-adjust_infection_to_report.R b/tests/testthat/test-adjust_infection_to_report.R index 6f73aa227..33e8502f4 100644 --- a/tests/testthat/test-adjust_infection_to_report.R +++ b/tests/testthat/test-adjust_infection_to_report.R @@ -3,24 +3,14 @@ cases <- data.table::copy(example_confirmed)[, cases := as.integer(confirm)] # define a single report delay distribution -delay_def <- lognorm_dist_def( - mean = 5, mean_sd = 1, sd = 3, sd_sd = 1, - max_value = 30, samples = 1, to_log = TRUE -) - -# define a single incubation period -incubation_def <- lognorm_dist_def( - mean = incubation_periods[1, ]$mean, - mean_sd = incubation_periods[1, ]$mean_sd, - sd = incubation_periods[1, ]$sd, - sd_sd = incubation_periods[1, ]$sd_sd, - max_value = 30, samples = 1 +delay <- LogNormal( + meanlog = Normal(1.4, 0.3), sdlog = Normal(0.6, 0.2), max = 30 ) test_that("adjust_infection_to_report can correctly handle a simple mapping", { reports <- adjust_infection_to_report( cases, - delay_defs = list(incubation_def, delay_def) + delay_defs = example_incubation_period + delay ) expect_true(nrow(reports) > 80) expect_true(all(!is.infinite(reports$cases))) @@ -31,10 +21,21 @@ test_that("adjust_infection_to_report can correctly handle a mapping with a day of the week effect", { reports <- adjust_infection_to_report( cases, - delay_defs = list(incubation_def, delay_def), + delay_defs = example_incubation_period + delay, reporting_effect = c(1.1, rep(1, 4), 0.95, 0.95) ) expect_true(nrow(reports) > 80) expect_true(all(!is.infinite(reports$cases))) expect_true(all(!is.na(reports$cases))) }) + +test_that("passing data tables to adjust_infection_to_report is deprecated", { + suppressWarnings(delay_def <- lognorm_dist_def( + mean = 5, mean_sd = 1, sd = 3, sd_sd = 1, + max_value = 30, samples = 1, to_log = TRUE + )) + expect_deprecated(adjust_infection_to_report( + cases, + delay_defs = list(delay_def) + )) +}) diff --git a/tests/testthat/test-delays.R b/tests/testthat/test-delays.R index e7daddf7e..dc70059ce 100644 --- a/tests/testthat/test-delays.R +++ b/tests/testthat/test-delays.R @@ -12,107 +12,67 @@ test_stan_delays <- function(generation_time = generation_time_opts(), } delay_params <- - c("delay_mean_mean", "delay_mean_sd", "delay_sd_mean", "delay_sd_sd", "delay_max", - "delay_np_pmf") + c("delay_params_mean", "delay_params_sd", "delay_max", "delay_np_pmf") test_that("generation times can be specified in different ways", { expect_equal( test_stan_delays(params = delay_params), - c(0, 1) + c(0, 1, 1, 1) ) expect_equal( test_stan_delays( - generation_time = generation_time_opts(dist_spec(mean = 3)), + generation_time = generation_time_opts(Fixed(value = 3)), params = delay_params ), - c(0, 0, 0, 1) + c(0, 0, 0, 1, 1, 1) ) expect_equal( round(test_stan_delays( - generation_time = generation_time_opts(dist_spec(mean = 3, sd = 1, max = 4)), + generation_time = generation_time_opts( + LogNormal(meanlog = 3, sdlog = 1, max = 4) + ), params = delay_params ), digits = 2), - c(0.02, 0.11, 0.22, 0.30, 0.35) + c(0.02, 0.11, 0.22, 0.30, 0.35, 1.00, 1.00) ) }) test_that("delay parameters can be specified in different ways", { expect_equal( tail(test_stan_delays( - delays = delay_opts(dist_spec(mean = 3)), + delays = delay_opts(Fixed(value = 3)), params = delay_params ), n = -2), - c(0, 0, 0, 1) + c(0, 0, 0, 1, 1) ) expect_equal( tail(round(test_stan_delays( - delays = delay_opts(dist_spec(mean = 3, sd = 1, max = 4)), + delays = delay_opts( + LogNormal(meanlog = 3, sdlog = 1, max = 4) + ), params = delay_params ), digits = 2), n = -2), - c(0.02, 0.11, 0.22, 0.30, 0.35) + c(0.02, 0.11, 0.22, 0.30, 0.35, 1.00) ) }) test_that("truncation parameters can be specified in different ways", { expect_equal( tail(round(test_stan_delays( - truncation = trunc_opts(dist = dist_spec(mean = 3, sd = 1, max = 4)), + truncation = trunc_opts( + dist = LogNormal(meanlog = 3, sdlog = 1, max = 4) + ), params = delay_params ), digits = 2), n = -2), - c(0.02, 0.11, 0.22, 0.30, 0.35) + c(1.00, 0.02, 0.11, 0.22, 0.30, 0.35) ) }) -test_that("contradictory generation times are caught", { - expect_error(generation_time_opts(dist_spec(mean = 3.5)), "must be an integer") - expect_error( - generation_time_opts(dist_spec(mean = 3, mean_sd = 1)), - "must be 0" - ) -}) - -test_that("contradictory delays are caught", { - expect_error( - test_stan_delays(delays = delay_opts(dist_spec(mean = 3.5))), - "must be an integer" - ) - expect_error( - test_stan_delays(delays = delay_opts(dist_spec(mean = 3, mean_sd = 1))), - "must be 0" - ) -}) - -test_that("deprecated arguments are caught", { - options(lifecycle_verbosity = "warning") - expect_warning( - test_stan_delays( - generation_time = generation_time_opts(mean = 3), - params = delay_params - ), "deprecated" - ) - expect_error( - test_stan_delays( - delays = delay_opts(mean = 3), - params = delay_params - ), "named arguments" - ) - expect_warning( - test_stan_delays( - delays = delay_opts(list(mean = 3)), - params = delay_params - ), "deprecated" - ) - expect_warning( - test_stan_delays( - delays = delay_opts(list(mean = 3)), - params = delay_params - ), "deprecated" - ) - expect_warning( - test_stan_delays( - delays = trunc_opts(list(mean = 3)), - params = delay_params - ), "deprecated" - ) - options(lifecycle_verbosity = NULL) +test_that("distributions incompatible with stan models are caught", { + expect_error(generation_time_opts( + Gamma(2, 2) + ), "maximum") + expect_error(delay_opts( + Normal(2, 2, max = 10) + ), "lognormal") }) diff --git a/tests/testthat/test-dist.R b/tests/testthat/test-dist.R index 0902f7183..c54f1ec22 100644 --- a/tests/testthat/test-dist.R +++ b/tests/testthat/test-dist.R @@ -2,27 +2,30 @@ skip_on_cran() skip_on_os("windows") test_that("distributions are the same in R and stan", { - args <- list(mean = 3, mean_sd = 0, sd = 2, sd_sd = 0, max_value = 15) - lognormal_params <- - do.call(lognorm_dist_def, (c(args, list(samples = 1))))$params[[1]] - gamma_params <- - do.call(gamma_dist_def, (c(args, list(samples = 1))))$params[[1]] - pmf_r_lognormal <- dist_skel( - n = seq_len(args$max_value + 1) - 1, dist = TRUE, cum = FALSE, - model = "lognormal", params = lognormal_params, max_value = args$max_value, - discrete = TRUE - ) - pmf_r_gamma <- dist_skel( - n = seq_len(args$max_value + 1) - 1, dist = TRUE, cum = FALSE, - model = "gamma", params = gamma_params, max_value = args$max_value, - discrete = TRUE - ) + args <- list(mean = 3, sd = 2, max = 15) - pmf_stan_lognormal <- discretised_pmf( - args$mean, args$sd, args$max_value + 1, 0 - ) - pmf_stan_gamma <- discretised_pmf(args$mean, args$sd, args$max_value + 1, 1) + lognormal_dist <- do.call(LogNormal, args) + gamma_dist <- do.call(Gamma, args) + + lognormal_params <- unname(as.numeric(lognormal_dist[[1]]$parameters)) + gamma_params <- unname(as.numeric(gamma_dist[[1]]$parameters)) + + pmf_r_lognormal <- discretise(lognormal_dist)[[1]]$pmf + pmf_r_gamma <- discretise(gamma_dist)[[1]]$pmf + + pmf_stan_lognormal <- discretised_pmf(lognormal_params, args$max + 1, 0) + pmf_stan_gamma <- discretised_pmf(gamma_params, args$max + 1, 1) expect_equal(pmf_r_lognormal, pmf_stan_lognormal) expect_equal(pmf_r_gamma, pmf_stan_gamma) }) + +test_that("deprecated functions are deprecated", { + args <- list(mean = 3, mean_sd = 0, sd = 2, sd_sd = 0, max_value = 15) + expect_deprecated( + do.call(lognorm_dist_def, (c(args, list(samples = 1))))$params[[1]] + ) + expect_deprecated( + do.call(gamma_dist_def, (c(args, list(samples = 1))))$params[[1]] + ) +}) diff --git a/tests/testthat/test-dist_spec.R b/tests/testthat/test-dist_spec.R index 3618f2370..73547cc03 100644 --- a/tests/testthat/test-dist_spec.R +++ b/tests/testthat/test-dist_spec.R @@ -1,13 +1,11 @@ test_that("dist_spec returns correct output for fixed lognormal distribution", { - result <- dist_spec(mean = 5, sd = 1, max = 19, distribution = "lognormal") - expect_equal(dim(result$mean_mean), 0) - expect_equal(dim(result$sd_mean), 0) - expect_equal(dim(result$dist), 0) - expect_equal(dim(result$max), 0) - expect_equal(result$fixed, array(1)) + result <- discretise(LogNormal(meanlog = 5, sdlog = 1, max = 19)) + expect_null(result[[1]]$parameters) + expect_equal(result[[1]]$distribution, "nonparametric") + expect_null(result[[1]]$max) expect_equal( - as.vector(round(result$np_pmf, 2)), + as.vector(round(result[[1]]$pmf, 2)), c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03, 0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09, 0.10, 0.10, 0.11, 0.12) @@ -15,218 +13,266 @@ test_that("dist_spec returns correct output for fixed lognormal distribution", { }) test_that("dist_spec returns correct output for uncertain gamma distribution", { - result <- dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 19, - distribution = "gamma" + result <- discretise( + Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19) ) - expect_equal(result$mean_mean, array(3L)) - expect_equal(result$sd_mean, array(2)) - expect_equal(result$mean_sd, array(0.5)) - expect_equal(result$sd_sd, array(0.5)) - expect_equal(result$dist, array("gamma")) - expect_equal(result$max, array(19)) - expect_equal(result$fixed, array(0L)) + expect_equal(result[[1]]$parameters$shape[[1]]$parameters$mean, 3) + expect_equal(result[[1]]$parameters$shape[[1]]$parameters$sd, 0.5) + expect_equal(result[[1]]$parameters$rate[[1]]$parameters$mean, 2) + expect_equal(result[[1]]$parameters$rate[[1]]$parameters$sd, 0.5) + expect_equal(result[[1]]$distribution, "gamma") + expect_equal(result[[1]]$max, 19) +}) + +test_that("dist_spec returns correct output for gamma distribution parameterised with scale", { + result <- Gamma(shape = 3, scale = 2) + expect_equal(result[[1]]$parameters$shape, 3) + expect_equal(result[[1]]$parameters$rate, 0.5) + expect_equal(result[[1]]$distribution, "gamma") + expect_true(is.infinite(result[[1]]$max)) }) test_that("dist_spec returns correct output for fixed distribution", { - result <- fix_dist(dist_spec( - mean = 5, mean_sd = 3, sd = 1, max = 19, distribution = "lognormal", - )) - expect_equal(dim(result$mean_mean), 0) - expect_equal(dim(result$sd_mean), 0) - expect_equal(result$fixed, array(1L)) + result <- discretise( + fix_dist(LogNormal(meanlog = Normal(5, 3), sdlog = 1, max = 19)) + ) + expect_null(result[[1]]$parameters) + expect_equal(result[[1]]$distribution, "nonparametric") + expect_null(result[[1]]$max) expect_equal( - as.vector(round(result$np_pmf, 2)), + as.vector(round(result[[1]]$pmf, 2)), c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03, 0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09, 0.10, 0.10, 0.11, 0.12) ) }) -test_that("dist_spec returns error when both pmf and distributional parameters are specified", { - expect_error(dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal", pmf = c(0.1, 0.2, 0.3, 0.4)), - "Distributional parameters or a pmf can be specified, but not both.") -}) - -test_that("dist_spec returns error when mean is missing but other distributional parameters are given", { - expect_error(dist_spec(sd = 1, max = 20, distribution = "lognormal"), - "If any distributional parameters are given then so must the mean.") -}) - -test_that("dist_spec returns error when maximum of parametric distributions is not specified", { - expect_error(dist_spec(mean = 5, sd = 1, distribution = "lognormal"), - "Maximum of parametric distributions must be specified.") -}) - -test_that("+.dist_spec returns correct output for sum of two distributions", { - lognormal <- dist_spec(mean = 5, sd = 1, max = 19, distribution = "lognormal") - gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, distribution = "gamma") - result <- lognormal + gamma - expect_equal(result$mean_mean, array(3)) - expect_equal(result$sd_mean, array(2)) - expect_equal(result$mean_sd, array(0.5)) - expect_equal(result$sd_sd, array(0.5)) - expect_equal(result$n, 2) - expect_equal(result$n_p, 1) - expect_equal(result$n_np, 1) - expect_equal(result$np_pmf_length, 20) -}) - -test_that("+.dist_spec returns correct output for sum of two fixed distributions", { - lognormal <- fix_dist(dist_spec( - mean = 5, sd = 1, max = 19, distribution = "lognormal" - )) - gamma <- fix_dist(dist_spec( - mean = 3, sd = 2, max = 19, distribution = "gamma" - )) - result <- lognormal + gamma - expect_equal(dim(result$mean_mean), 0) - expect_equal(dim(result$sd_mean), 0) - expect_equal(result$n, 1) - expect_equal(result$n_p, 0) - expect_equal(result$n_np, 1) - expect_equal(result$np_pmf_length, 30) -}) - -test_that("+.dist_spec returns correct output for sum of two nonparametric distributions", { - lognormal <- dist_spec(pmf = c(0.1, 0.2, 0.3, 0.4)) - gamma <- dist_spec(pmf = c(0.1, 0.2, 0.3, 0.4)) - result <- lognormal + gamma - expect_equal(dim(result$mean_mean), 0) - expect_equal(dim(result$sd_mean), 0) - expect_equal(result$n, 1) - expect_equal(result$n_p, 0) - expect_equal(result$n_np, 1) - expect_equal(result$np_pmf_length, 7) +test_that("dist_spec returns error when mixed natural and unnatural parameters are specified", { + expect_error( + LogNormal(meanlog = 5, sd = 1, max = 20), + "Incompatible combination." + ) +}) + +test_that("dist_spec returns error when the wrong number of paramters are given", { + expect_error(LogNormal(sd = 1, max = 20), "must be specified") + expect_error(Gamma(shape = 1, rate = 2, mean = 3), "must be specified") +}) + +test_that("c.dist_spec returns correct output for sum of two distributions", { + dist1 <- LogNormal(meanlog = 5, sdlog = 1, max = 19) + dist2 <- Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 20) + result <- dist1 + dist2 + expect_equal(result[[1]]$parameters$meanlog, 5) + expect_equal(result[[1]]$parameters$sdlog, 1) + expect_equal(result[[2]]$parameters$shape[[1]]$parameters$mean, 3) + expect_equal(result[[2]]$parameters$shape[[1]]$parameters$sd, 0.5) + expect_equal(result[[2]]$parameters$rate[[1]]$parameters$mean, 2) + expect_equal(result[[2]]$parameters$rate[[1]]$parameters$sd, 0.5) + expect_equal(length(result), 2) +}) + +test_that("collapse returns correct output for sum of two nonparametric distributions", { + dist1 <- NonParametric(c(0.1, 0.2, 0.3, 0.4)) + dist2 <- NonParametric(c(0.1, 0.2, 0.3, 0.4)) + result <- collapse(c(dist1, dist2)) + expect_null(result[[1]]$parameters) + expect_equal(result[[1]]$distribution, "nonparametric") + expect_null(result[[1]]$max) + expect_equal(length(result), 1) expect_equal( - as.vector(round(result$np_pmf, 2)), + round(result[[1]]$pmf, 2), c(0.01, 0.04, 0.10, 0.20, 0.25, 0.24, 0.16) ) }) -test_that("Testing `+.dist_spec` function with tolerance parameter", { +test_that("Testing `apply_tolerance` function applied to a convolution", { # Create distributions - lognormal <- dist_spec( - mean = 1.6, sd = 1, max = 19, distribution = "lognormal" - ) - gamma <- dist_spec( - mean = 3, sd = 2, max = 19, distribution = "gamma" - ) - + dist1 <- LogNormal(meanlog = 1.6, sdlog = 1, max = 19) + dist2 <- Gamma(mean = 3, sd = 2, max = 19) + # Compute combined distribution with default tolerance - combined_default <- EpiNow2:::`+.dist_spec`(lognormal, gamma) + combined <- collapse(discretise(c(dist1, dist2))) # Compute combined distribution with larger tolerance - combined_larger_tolerance <- EpiNow2:::dist_spec_plus( - lognormal, gamma, tolerance = 0.01 - ) + combined_tolerance <- apply_tolerance(combined, tolerance = 0.001) # The length of the combined PMF should be greater with default tolerance - expect_true( - length(combined_default$np_pmf) > length(combined_larger_tolerance$np_pmf) - ) + expect_true(length(combined[[1]]$pmf) > length(combined_tolerance[[1]]$pmf)) # Both should sum to 1 - expect_equal(sum(combined_default$np_pmf), 1) - expect_equal(sum(combined_larger_tolerance$np_pmf), 1) + expect_equal(sum(combined[[1]]$pmf), 1) + expect_equal(sum(combined_tolerance[[1]]$pmf), 1) # The first 5 entries should be within 0.01 of each other expect_equal( - combined_default$np_pmf[1:5], combined_larger_tolerance$np_pmf[1:5], - tolerance = 0.01 - ) - expect_equal( - mean(combined_default), mean(combined_larger_tolerance), tolerance = 0.1 + combined[[1]]$pmf[1:5], combined_tolerance[[1]]$pmf[1:5], tolerance = 0.01 ) + expect_equal(mean(combined), mean(combined_tolerance), tolerance = 0.1) }) - -test_that("mean.dist_spec returns correct output for fixed lognormal distribution", { - lognormal <- dist_spec( - mean = convert_to_logmean(3, 1), sd = convert_to_logsd(3, 1), - max = 19, distribution = "lognormal" - ) - result <- EpiNow2:::mean.dist_spec(lognormal) - expect_equal(result, 2.49, tolerance = 0.01) # here we can see the bias from +test_that("summary functions return correct output for fixed lognormal distribution", { + dist <- discretise(LogNormal(mean = 3, sd = 1, max = 19)) + ## here we can see the bias from # using this kind of discretisation approach + expect_equal(EpiNow2:::mean.dist_spec(dist), 2.49, tolerance = 0.01) + expect_equal(EpiNow2:::sd_dist(dist), 1.09, tolerance = 0.01) + expect_equal(EpiNow2:::max.dist_spec(dist), 19L) }) -test_that("mean.dist_spec returns correct output for uncertain gamma distribution", { - gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 19, distribution = "gamma") - result <- EpiNow2:::mean.dist_spec(gamma) - expect_equal(result, 3) +test_that("summary functions return correct output for uncertain gamma distribution", { + dist <- Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19) + expect_equal(EpiNow2:::mean.dist_spec(dist, ignore_uncertainty = TRUE), 1.5) + expect_equal(EpiNow2:::max.dist_spec(dist), 19L) }) test_that("mean.dist_spec returns correct output for sum of two distributions", { - lognormal <- dist_spec(mean = 1, sd = 1, max = 19, distribution = "lognormal") - gamma <- dist_spec(mean = 3, sd = 2, max = 19, distribution = "gamma") - result <- EpiNow2:::mean.dist_spec(lognormal + gamma) - expect_equal(result, c(5.84), tolerance = 0.001) + dist1 <- LogNormal(meanlog = 1, sdlog = 1, max = 19) + dist2 <- Gamma(mean = 3, sd = 2, max = 19) + dist <- dist1 + dist2 + expect_equal(EpiNow2:::mean.dist_spec(dist), c(4.48, 3), tolerance = 0.001) + expect_equal(EpiNow2:::sd_dist(dist), c(5.87, 2), tolerance = 0.001) + ## shortened due to tolerance level + expect_equal(EpiNow2:::max.dist_spec(dist), c(19L, 19L)) +}) + +test_that("mean returns NA when applied to uncertain distributions", { + dist <- Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19) + expect_true(is.na(EpiNow2:::mean.dist_spec(dist))) +}) + +test_that("sd_dist returns NA when applied to uncertain distributions", { + dist <- Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19) + expect_true(is.na(EpiNow2:::sd_dist(dist))) }) test_that("print.dist_spec correctly prints the parameters of the fixed lognormal", { - lognormal <- dist_spec(mean = 1.5, sd = 0.5, max = 19, distribution = "lognormal") + dist <- discretise(LogNormal(meanlog = 1.5, sdlog = 0.5, max = 19)) - expect_output(print(lognormal), "\\n Fixed distribution with PMF \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]\\n") + expect_output(print(dist), "- nonparametric distribution\\n PMF: \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]") }) test_that("print.dist_spec correctly prints the parameters of the uncertain gamma", { - gamma <- dist_spec( - mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 19, - distribution = "gamma" + gamma <- Gamma( + shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19 ) - expect_output(print(gamma), "\\n Uncertain gamma distribution with \\(untruncated\\) mean 3 \\(SD 0\\.5\\) and SD 2 \\(SD 0\\.5\\)\\n") + expect_output(print(gamma), "- gamma distribution \\(max: 19\\):\\n shape:\\n - normal distribution:\\n mean:\\n 3\\n sd:\\n 0\\.5\\n rate:\\n - normal distribution:\\n mean:\\n 2\\n sd:\\n 0\\.5") }) test_that("print.dist_spec correctly prints the parameters of the uncertain lognormal", { - lognormal_uncertain <- dist_spec(mean = 1.5, sd = 0.5, mean_sd = 0.1, sd_sd = 0.1, max = 19, distribution = "lognormal") - - expect_output(print(lognormal_uncertain), "\\n Uncertain lognormal distribution with \\(untruncated\\) logmean 1\\.5 \\(SD 0\\.1\\) and logSD 0\\.5 \\(SD 0\\.1\\)\\n") -}) - -test_that("print.dist_spec correctly prints the parameters of an empty distribution", { - empty <- dist_spec() + dist <- LogNormal( + meanlog = Normal(1.5, 0.1), sdlog = Normal(0.5, 0.1), max = 19 + ) - expect_output(print(empty), "Empty `dist_spec` distribution.") + expect_output(print(dist), "- lognormal distribution \\(max: 19\\):\\n meanlog:\\n - normal distribution:\\n mean:\\n 1\\.5\\n sd:\\n 0\\.1\\n sdlog:\\n - normal distribution:\\n mean:\\n 0\\.5\\n sd:\\n 0\\.1") }) test_that("print.dist_spec correctly prints the parameters of a combination of distributions", { - lognormal <- dist_spec(mean = 1.5, sd = 0.5, max = 19, distribution = "lognormal") - gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 19, distribution = "gamma") - combined <- lognormal + gamma + dist1 <- LogNormal(meanlog = 1.5, sdlog = 0.5, max = 19) + dist2 <- Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19) + combined <- dist1 + dist2 - expect_output(print(combined), "Combination of delay distributions:\\n Fixed distribution with PMF \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]\\n Uncertain gamma distribution with \\(untruncated\\) mean 3 \\(SD 0\\.5\\) and SD 2 \\(SD 0\\.5\\)\\n") + expect_output(print(combined), "Composite distribution:\\n- lognormal distribution \\(max: 19\\):\\n meanlog:\\n 1\\.5\\n sdlog:\\n 0\\.5\\n- gamma distribution \\(max: 19\\):\\n shape:\\n - normal distribution:\\n mean:\\n 3\\n sd:\\n 0\\.5\\n rate:\\n - normal distribution:\\n mean:\\n 2\\n sd:\\n 0\\.5") }) test_that("plot.dist_spec returns a ggplot object", { - lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 19, distribution = "lognormal") - plot <- plot(lognormal) + dist <- LogNormal(meanlog = 1.6, sdlog = 0.5, max = 19) + plot <- plot(dist) expect_s3_class(plot, "ggplot") }) test_that("plot.dist_spec correctly plots a single distribution", { - lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 19, distribution = "lognormal") - plot <- plot(lognormal) + dist <- LogNormal(meanlog = 1.6, sdlog = 0.5, max = 19) + plot <- plot(dist) expect_equal(length(plot$layers), 2) expect_equal(length(plot$facet$params$facets), 1) }) test_that("plot.dist_spec correctly plots multiple distributions", { - lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 19, distribution = "lognormal") - gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 19, distribution = "gamma") - combined <- lognormal + gamma + dist1 <- LogNormal(meanlog = 1.6, sdlog = 0.5, max = 19) + dist2 <- Gamma(shape = Normal(3, 5), rate = Normal(1, 2), max = 19) + combined <- dist1 + dist2 plot <- plot(combined) expect_equal(length(plot$layers), 2) expect_equal(length(plot$facet$params$facets), 1) }) test_that("plot.dist_spec correctly plots a combination of fixed distributions", { - lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 19, distribution = "lognormal") - combined <- lognormal + lognormal + dist <- LogNormal(meanlog = 1.6, sdlog = 0.5, max = 19) + combined <- dist + dist plot <- plot(combined) expect_equal(length(plot$layers), 2) expect_equal(length(plot$facet$params$facets), 1) }) -test_that("deprecated arguments are caught", { - expect_deprecated(dist_spec(mean = 1.6, sd = 0.6, max = 19, fixed = TRUE)) +test_that("fix_dist works with composite delay distributions", { + dist1 <- LogNormal(meanlog = Normal(1, 0.1), sdlog = 1, max = 19) + dist2 <- Gamma(mean = 3, sd = 2, max = 19) + dist <- dist1 + dist2 + expect_equal(length(collapse(discretise(fix_dist(dist)))), 1L) +}) + +test_that("composite delay distributions can be disassembled", { + dist1 <- LogNormal(meanlog = Normal(1, 0.1), sdlog = 1, max = 19) + dist2 <- Gamma(mean = 3, sd = 2, max = 19) + dist <- dist1 + dist2 + expect_equal(EpiNow2:::extract_single_dist(dist, 1), dist1) + expect_equal(EpiNow2:::extract_single_dist(dist, 2), dist2) +}) + +test_that("delay distributions can be specified in different ways", { + expect_equal( + unname(as.numeric(LogNormal(mean = 4, sd = 1)[[1]]$parameters)), + c(1.4, 0.25), + tolerance = 0.1 + ) + expect_equal( + round(discretise(LogNormal(mean = 4, sd = 1, max = 10))[[1]]$pmf, 2), + c(0.00, 0.00, 0.14, 0.40, 0.30, 0.11, 0.03, 0.01, 0.00, 0.00, 0.00) + ) + expect_equal( + unname(as.numeric(Gamma(mean = 4, sd = 1)[[1]]$parameters)), + c(16, 4), + tolerance = 0.1 + ) + expect_equal( + round(discretise(Gamma(mean = 4, sd = 1, max = 7))[[1]]$pmf, 2), + c(0.00, 0.01, 0.15, 0.38, 0.31, 0.12, 0.03, 0.00) + ) + expect_equal( + unname(as.numeric( + Gamma( + shape = Normal(16, 2), rate = Normal(4, 1) + )[[1]]$parameters$shape[[1]]$parameters + )), + c(16, 2) + ) + expect_equal( + unname(as.numeric( + Gamma( + shape = Normal(16, 2), rate = Normal(4, 1) + )[[1]]$parameters$rate[[1]]$parameters + )), + c(4, 1) + ) + expect_equal( + unname(as.numeric(Normal(mean = 4, sd = 1)[[1]]$parameters)), c(4, 1) + ) + expect_equal( + round(discretise(Normal(mean = 4, sd = 1, max = 5))[[1]]$pmf, 2), + c(0.00, 0.02, 0.14, 0.35, 0.35, 0.14) + ) + expect_equal(discretise(Fixed(value = 3))[[1]]$pmf, c(0, 0, 0, 1)) + expect_equal(Fixed(value = 3.5)[[1]]$parameters$value, 3.5) + expect_equal( + NonParametric(c(0.1, 0.3, 0.2, 0.4))[[1]]$pmf, + c(0.1, 0.3, 0.2, 0.4) + ) + expect_equal( + round(NonParametric(c(0.1, 0.3, 0.2, 0.1, 0.1))[[1]]$pmf, 2), + c(0.12, 0.37, 0.25, 0.12, 0.12) + ) +}) + +test_that("deprecated functions are deprecated", { + expect_deprecated(dist_spec(params_mean = c(1.6, 0.6), max = 19)) }) diff --git a/tests/testthat/test-epinow.R b/tests/testthat/test-epinow.R index 68810f5ce..74fa057e8 100644 --- a/tests/testthat/test-epinow.R +++ b/tests/testthat/test-epinow.R @@ -1,9 +1,9 @@ skip_on_cran() # set example reporting delay -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), mean_sd = 0.1, - sd = convert_to_logsd(2, 1), sd_sd = 0.1, +reporting_delay <- LogNormal( + meanlog = Normal(0.6, 0.06), + sdlog = Normal(0.5, 0.1), max = 10 ) @@ -20,7 +20,7 @@ test_that("epinow produces expected output when run with default settings", { out <- suppressWarnings(epinow( reported_cases = reported_cases, generation_time = generation_time_opts(example_generation_time), - delays = delay_opts(example_incubation_period + reporting_delay), + delays = delay_opts(c(example_incubation_period, reporting_delay)), stan = stan_opts( samples = 25, warmup = 25, cores = 1, chains = 2, diff --git a/tests/testthat/test-estimate_secondary.R b/tests/testthat/test-estimate_secondary.R index ebe87fdec..12fc95a20 100644 --- a/tests/testthat/test-estimate_secondary.R +++ b/tests/testthat/test-estimate_secondary.R @@ -29,7 +29,7 @@ inc <- estimate_secondary(inc_cases[1:60], # extract posterior variables of interest params <- c( - "meanlog" = "delay_mean[1]", "sdlog" = "delay_sd[1]", + "meanlog" = "delay_params[1]", "sdlog" = "delay_params[2]", "scaling" = "frac_obs[1]" ) @@ -82,10 +82,7 @@ test_that("estimate_secondary successfully returns estimates when passed NA valu cases_na[sample(1:60, 5), secondary := NA] inc_na <- estimate_secondary(cases_na[1:60], delays = delay_opts( - dist_spec( - mean = 1.8, mean_sd = 0, - sd = 0.5, sd_sd = 0, max = 30 - ) + LogNormal(meanlog = 1.8, sdlog = 0.5, max = 30) ), obs = obs_opts(scale = list(mean = 0.2, sd = 0.2), week_effect = FALSE), verbose = FALSE @@ -95,10 +92,7 @@ test_that("estimate_secondary successfully returns estimates when passed NA valu prev_na <- estimate_secondary(prev_cases_na[1:60], secondary = secondary_opts(type = "prevalence"), delays = delay_opts( - dist_spec( - mean = 1.8, mean_sd = 0, - sd = 0.5, sd_sd = 0, max = 30 - ) + LogNormal(mean = 1.8, sd = 0.5, max = 30) ), obs = obs_opts(scale = list(mean = 0.2, sd = 0.2), week_effect = FALSE), verbose = FALSE @@ -117,9 +111,8 @@ test_that("estimate_secondary successfully returns estimates when accumulating t ) inc_weekly <- estimate_secondary(cases_weekly, delays = delay_opts( - dist_spec( - mean = 1.8, mean_sd = 0, - sd = 0.5, sd_sd = 0, max = 30 + LogNormal( + mean = 1.8, sd = 0.5, max = 30 ) ), obs = obs_opts( @@ -197,8 +190,10 @@ test_that("forecast_secondary can return values from simulated data when using }) test_that("estimate_secondary works with weigh_delay_priors = TRUE", { - delays <- dist_spec( - mean = 2.5, mean_sd = 0.5, sd = 0.47, sd_sd = 0.25, max = 30 + delays <- LogNormal( + meanlog = Normal(2.5, 0.5), + sdlog = Normal(0.47, 0.25), + max = 30 ) inc_weigh <- estimate_secondary( inc_cases[1:60], delays = delay_opts(delays), diff --git a/tests/testthat/test-get_dist.R b/tests/testthat/test-get_dist.R index 463c3c964..97cafd400 100644 --- a/tests/testthat/test-get_dist.R +++ b/tests/testthat/test-get_dist.R @@ -1,5 +1,5 @@ test_that("get_dist is deprecated", { - data <- data.table::data.table(mean = 1, mean_sd = 1, sd = 1, sd_sd = 1, source = "test", disease = "test", dist = "gamma") + data <- data.table::data.table(mean = 1, mean_sd = 1, sd = 1, sd_sd = 1, source = "test", disease = "test", dist = "lognormal") expect_deprecated( get_dist(data, disease = "test", source = "test") ) diff --git a/tests/testthat/test-report_cases.R b/tests/testthat/test-report_cases.R index 5378df768..e646ce7a9 100644 --- a/tests/testthat/test-report_cases.R +++ b/tests/testthat/test-report_cases.R @@ -15,7 +15,7 @@ test_that("report_cases can simulate infections forward", { expect_equal(class(reported_cases), "list") expect_equal(class(reported_cases$samples), c("data.table", "data.frame")) expect_equal(class(reported_cases$summarised), c("data.table", "data.frame")) - expect_equal(nrow(reported_cases$summarised), 10) + expect_equal(nrow(reported_cases$summarised), 7) expect_equal(class(reported_cases$summarised$median), "numeric") set.seed(Sys.time()) }) diff --git a/tests/testthat/test-seeding-time.R b/tests/testthat/test-seeding-time.R index d189192ef..746a5dfda 100644 --- a/tests/testthat/test-seeding-time.R +++ b/tests/testthat/test-seeding-time.R @@ -1,10 +1,10 @@ test_that("Seeding times are correctly calculated", { - gt1 <- dist_spec(mean = 5, sd = 1, max = 9) - gt2 <- dist_spec(mean = 10, sd = 2, max = 14) - delay1 <- dist_spec(mean = 5, sd = 1, max = 9) - delay2 <- dist_spec(mean = 7, sd = 3, max = 14) + gt1 <- LogNormal(mean = 5, sd = 1, max = 9) + gt2 <- LogNormal(mean = 10, sd = 2, max = 14) + delay1 <- LogNormal(mean = 5, sd = 1, max = 9) + delay2 <- LogNormal(mean = 7, sd = 3, max = 14) expect_equal( - EpiNow2:::get_seeding_time(delay1, gt1 + gt2), 23L ## 10 + 15 - 1 - 1 + EpiNow2:::get_seeding_time(delay1, gt1 + gt2), 23L ## 9 + 14 ) expect_equal( EpiNow2:::get_seeding_time(delay1 + delay2, gt1), 12L ## 5 + 7 @@ -12,7 +12,7 @@ test_that("Seeding times are correctly calculated", { }) test_that("Short seeding times are rounded up to 1", { - delay <- dist_spec(mean = 0.5, sd = 1, max = 2) - gt <- dist_spec(mean = 1) + delay <- LogNormal(mean = 0.5, sd = 1, max = 2) + gt <- Fixed(value = 1) expect_equal(EpiNow2:::get_seeding_time(delay, gt), 1L) }) diff --git a/tests/testthat/test-stan-infections.R b/tests/testthat/test-stan-infections.R index 15736f996..1b43f7baa 100644 --- a/tests/testthat/test-stan-infections.R +++ b/tests/testthat/test-stan-infections.R @@ -14,11 +14,11 @@ test_that("update_infectiousness works as expected with default settings", { expect_error(update_infectiousness(rep(1, 20), rep(0.1, 5), 5, 10, 10)) }) -pmf <- discretised_pmf(3, 2, 15, 1) +pmf <- discretised_pmf(c(2.25, 0.75), 15, 1) gt_rev_pmf <- get_delay_rev_pmf( 1L, 15L, array(0L), array(1L), array(c(1L, 2L)), array(15L), pmf, - array(c(1L, 16L)), numeric(0), numeric(0), 0L, + array(c(1L, 16L)), numeric(0), 1L, 0L, 1L, 1L, 0L ) diff --git a/tests/testthat/test-stan-secondary.R b/tests/testthat/test-stan-secondary.R index 6f64e9dc1..d53c602aa 100644 --- a/tests/testthat/test-stan-secondary.R +++ b/tests/testthat/test-stan-secondary.R @@ -4,7 +4,7 @@ skip_on_os("windows") # test primary reports and observations reports <- rep(10, 20) obs <- rep(4, 20) -delay_rev_pmf <- reverse_mf(discretised_pmf(log(3), 0.1, 5, 0)) +delay_rev_pmf <- reverse_mf(discretised_pmf(c(log(3), 0.1), 5, 0)) scaled <- reports * 0.1 convolved <- rep(1e-5, 20) + convolve_to_report(scaled, delay_rev_pmf, 0) diff --git a/touchstone/script.R b/touchstone/script.R index 604c8a0fe..4f3a85be1 100644 --- a/touchstone/script.R +++ b/touchstone/script.R @@ -10,7 +10,7 @@ touchstone::benchmark_run( default = { epinow( reported_cases = reported_cases, generation_time = generation_time_opts(fixed_generation_time), - delays = fixed_delays, + delays = delay_opts(fixed_delays), rt = rt_opts(prior = list(mean = 2, sd = 0.2)), stan = stan_opts( cores = 2, samples = 500, chains = 2, @@ -57,7 +57,7 @@ touchstone::benchmark_run( stationary = { epinow( reported_cases = reported_cases, generation_time = generation_time_opts(fixed_generation_time), - delays = fixed_delays, + delays = delay_opts(fixed_delays), rt = rt_opts(prior = list(mean = 2, sd = 0.2), gp_on = "R0"), stan = stan_opts( cores = 2, samples = 500, chains = 2, @@ -73,7 +73,7 @@ touchstone::benchmark_run( random_walk = { epinow( reported_cases = reported_cases, generation_time = generation_time_opts(fixed_generation_time), - delays = fixed_delays, + delays = delay_opts(fixed_delays), rt = rt_opts(prior = list(mean = 2, sd = 0.2), rw = 7), gp = NULL, stan = stan_opts( diff --git a/vignettes/EpiNow2.Rmd b/vignettes/EpiNow2.Rmd index fbf69082e..399ab5273 100644 --- a/vignettes/EpiNow2.Rmd +++ b/vignettes/EpiNow2.Rmd @@ -25,9 +25,9 @@ library(EpiNow2) ### Reporting delays, incubation period and generation time -Distributions can be supplied in two ways. First, by supplying delay data to `estimate_delay()`, where a subsampled bootstrapped lognormal will be fit to account for uncertainty in the observed data without being biased by changes in incidence (see `?EpiNow2::estimate_delay()`). +Distributions can be supplied in two ways. First, one can supplying delay data to `estimate_delay()`, where a subsampled bootstrapped lognormal will be fit to account for uncertainty in the observed data without being biased by changes in incidence (see `?EpiNow2::estimate_delay()`). -Second, by specifying predetermined delays with uncertainty using `dist_spec()`. An arbitrary number of delay distributions are supported in `dist_spec()` with the most common use case likely to be an incubation period followed by a reporting delay (see `?EpiNow2::dist_spec()`). +Second, one can specify predetermined delays with uncertainty using the distribution functions such as `Gamma` or `Lognormal`. An arbitrary number of delay distributions are supported in `dist_spec()` with a common use case being an incubation period followed by a reporting delay. For more information on specifying distributions see (see `?EpiNow2::Distributions`). For example if data on the delay between onset and infection was available we could fit a distribution to it, using `estimate_delay()`, with appropriate uncertainty as follows (note this is a synthetic example), @@ -38,33 +38,21 @@ reporting_delay <- estimate_delay( ) ``` -If data was not available we could instead specify an informed estimate of the likely delay -using `dist_spec()`. To demonstrate, we choose a lognormal distribution with mean 2, standard deviation 1 (both with some uncertainty) and a maximum of 10. *This is just an example and unlikely to apply in any particular use case*. +If data was not available we could instead specify an informed estimate of the likely delay using the distribution functions `Gamma` or `LogNormal`. +To demonstrate, we choose a lognormal distribution with mean 2, standard deviation 1 and a maximum of 10. *This is just an example and unlikely to apply in any particular use case*. ```r -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), - mean_sd = 0.1, - sd = convert_to_logsd(2, 1), - sd_sd = 0.1, - max = 10, - distribution = "lognormal" -) -``` - -``` -## Warning: The meaning of the 'max' argument has changed compared to previous versions. It now indicates the maximum of a distribution rather than the length of the probability mass function (including 0) that it represented previously. To replicate previous behaviour reduce max by 1. -## This warning is displayed once every 8 hours. -``` - -```r +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) reporting_delay ``` ``` -## -## Uncertain lognormal distribution with (untruncated) logmean 0.58 (SD 0.1) and logSD 0.47 (SD 0.1) +## - lognormal distribution (max: 10): +## meanlog: +## 0.58 +## sdlog: +## 0.47 ``` For the rest of this vignette, we will use inbuilt example literature estimates for the incubation period and generation time of Covid-19 (see [here](https://github.com/epiforecasts/EpiNow2/tree/main/data-raw) for the code that generates these estimates). *These distributions are unlikely to be applicable for your use case. We strongly recommend investigating what might be the best distributions to use in any given use case.* @@ -75,8 +63,19 @@ example_generation_time ``` ``` -## -## Uncertain gamma distribution with (untruncated) mean 3.6 (SD 0.71) and SD 3.1 (SD 0.77) +## - gamma distribution (max: 14): +## shape: +## - normal distribution: +## mean: +## 1.4 +## sd: +## 0.48 +## rate: +## - normal distribution: +## mean: +## 0.38 +## sd: +## 0.25 ``` ```r @@ -84,8 +83,19 @@ example_incubation_period ``` ``` -## -## Uncertain lognormal distribution with (untruncated) logmean 1.6 (SD 0.064) and logSD 0.42 (SD 0.069) +## - lognormal distribution (max: 14): +## meanlog: +## - normal distribution: +## mean: +## 1.6 +## sd: +## 0.064 +## sdlog: +## - normal distribution: +## mean: +## 0.42 +## sd: +## 0.069 ``` Now, to the functions. @@ -104,6 +114,7 @@ head(reported_cases) ``` ## date confirm +## ## 1: 2020-02-22 14 ## 2: 2020-02-23 62 ## 3: 2020-02-24 53 @@ -127,7 +138,7 @@ estimates <- epinow( ``` ``` -## DEBUG [2023-11-24 16:30:24] epinow: Running in exact mode for 2000 samples (across 4 chains each with a warm up of 250 iterations each) and 79 time steps of which 7 are a forecast +## DEBUG [2024-03-06 10:56:48] epinow: Running in exact mode for 2000 samples (across 4 chains each with a warm up of 250 iterations each) and 81 time steps of which 7 are a forecast ``` ```r @@ -135,8 +146,9 @@ names(estimates) ``` ``` -## [1] "estimates" "estimated_reported_cases" "summary" -## [4] "plots" "timing" +## [1] "estimates" "estimated_reported_cases" +## [3] "summary" "plots" +## [5] "timing" ``` Both summary measures and posterior samples are returned for all parameters in an easily explored format which can be accessed using `summary`. The default is to return a summary table of estimates for key parameters at the latest date partially supported by data. @@ -150,11 +162,11 @@ knitr::kable(summary(estimates)) |measure |estimate | |:-------------------------------------|:----------------------| -|New confirmed cases by infection date |2274 (1099 -- 4314) | +|New confirmed cases by infection date |2275 (1119 -- 4289) | |Expected change in daily cases |Likely decreasing | -|Effective reproduction no. |0.88 (0.6 -- 1.2) | -|Rate of growth |-0.027 (-0.1 -- 0.037) | -|Doubling/halving time (days) |-25 (19 -- -6.8) | +|Effective reproduction no. |0.88 (0.59 -- 1.2) | +|Rate of growth |-0.026 (-0.1 -- 0.034) | +|Doubling/halving time (days) |-26 (20 -- -6.8) | Summarised parameter estimates can also easily be returned, either filtered for a single parameter or for all parameters. @@ -164,20 +176,22 @@ head(summary(estimates, type = "parameters", params = "R")) ``` ``` -## date variable strat type median mean sd lower_90 lower_50 lower_20 -## 1: 2020-02-22 R estimate 2.221079 2.224645 0.14975914 1.982998 2.120541 2.184087 -## 2: 2020-02-23 R estimate 2.188992 2.189990 0.12321715 1.987626 2.105233 2.156689 -## 3: 2020-02-24 R estimate 2.154198 2.153534 0.10184039 1.984744 2.083930 2.129261 -## 4: 2020-02-25 R estimate 2.115460 2.115351 0.08536356 1.976180 2.057553 2.093551 -## 5: 2020-02-26 R estimate 2.076472 2.075590 0.07338724 1.955627 2.025919 2.055536 -## 6: 2020-02-27 R estimate 2.033228 2.034460 0.06529358 1.925234 1.990964 2.017267 -## upper_20 upper_50 upper_90 -## 1: 2.256138 2.321166 2.481580 -## 2: 2.219594 2.269469 2.400337 -## 3: 2.179742 2.222020 2.331207 -## 4: 2.139824 2.173188 2.256432 -## 5: 2.095006 2.125832 2.198023 -## 6: 2.051111 2.079405 2.142454 +## date variable strat type median mean sd lower_90 +## +## 1: 2020-02-22 R estimate 2.235313 2.242778 0.15785751 1.994122 +## 2: 2020-02-23 R estimate 2.204258 2.206780 0.13441775 1.994963 +## 3: 2020-02-24 R estimate 2.165179 2.168790 0.11730902 1.983866 +## 4: 2020-02-25 R estimate 2.123195 2.128927 0.10599589 1.958182 +## 5: 2020-02-26 R estimate 2.081865 2.087395 0.09931082 1.931449 +## 6: 2020-02-27 R estimate 2.038602 2.044461 0.09574420 1.896977 +## lower_50 lower_20 upper_20 upper_50 upper_90 +## +## 1: 2.133553 2.196041 2.273706 2.343048 2.510622 +## 2: 2.117524 2.168279 2.232611 2.295411 2.431460 +## 3: 2.092001 2.134802 2.192621 2.247227 2.364333 +## 4: 2.057449 2.099115 2.151714 2.200477 2.308683 +## 5: 2.018154 2.055255 2.107715 2.153086 2.256241 +## 6: 1.978879 2.014504 2.063162 2.107575 2.208615 ``` Reported cases are returned in a separate data frame in order to streamline the reporting of forecasts and for model evaluation. @@ -188,13 +202,22 @@ head(summary(estimates, output = "estimated_reported_cases")) ``` ``` -## date type median mean sd lower_90 lower_50 lower_20 upper_20 upper_50 upper_90 -## 1: 2020-02-22 gp_rt 67 68.2710 18.40907 41.95 55 62 71 79 101 -## 2: 2020-02-23 gp_rt 77 79.2985 21.72877 47.00 64 73 83 92 117 -## 3: 2020-02-24 gp_rt 77 78.9005 20.98294 49.00 64 72 82 91 116 -## 4: 2020-02-25 gp_rt 74 75.0540 21.44484 43.00 60 68 78 87 114 -## 5: 2020-02-26 gp_rt 78 80.1645 21.46518 48.00 65 73 84 93 118 -## 6: 2020-02-27 gp_rt 110 113.6910 30.39336 68.00 92 104 118 134 168 +## date type median mean sd lower_90 lower_50 lower_20 +## +## 1: 2020-02-22 gp_rt 69 70.3425 18.36240 43.95 58 64.0 +## 2: 2020-02-23 gp_rt 77 79.1820 20.80436 48.00 64 73.0 +## 3: 2020-02-24 gp_rt 76 77.5690 20.52722 48.00 63 71.0 +## 4: 2020-02-25 gp_rt 73 74.9525 19.90978 46.95 61 68.0 +## 5: 2020-02-26 gp_rt 77 79.1355 20.81427 50.00 64 72.0 +## 6: 2020-02-27 gp_rt 111 112.9385 28.65963 70.00 93 103.6 +## upper_20 upper_50 upper_90 +## +## 1: 73 81 102.00 +## 2: 83 92 116.00 +## 3: 81 90 113.05 +## 4: 78 87 110.00 +## 5: 82 92 116.00 +## 6: 118 130 165.00 ``` A range of plots are returned (with the single summary plot shown below). These plots can also be generated using the following `plot` method. @@ -225,6 +248,7 @@ head(reported_cases) ``` ## date confirm region +## ## 1: 2020-02-22 14 testland ## 2: 2020-02-23 62 testland ## 3: 2020-02-24 53 testland @@ -248,27 +272,30 @@ estimates <- regional_epinow( ``` ``` -## INFO [2023-11-24 16:31:54] Producing following optional outputs: regions, summary, samples, plots, latest -## INFO [2023-11-24 16:31:54] Reporting estimates using data up to: 2020-04-21 -## INFO [2023-11-24 16:31:54] No target directory specified so returning output -## INFO [2023-11-24 16:31:54] Producing estimates for: testland, realland -## INFO [2023-11-24 16:31:54] Regions excluded: none -## DEBUG [2023-11-24 16:31:54] testland: Running in exact mode for 1000 samples (across 4 chains each with a warm up of 250 iterations each) and 79 time steps of which 7 are a forecast -## WARN [2023-11-24 16:32:07] testland (chain: 1): Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. +## INFO [2024-03-06 10:57:48] Producing following optional outputs: regions, summary, samples, plots, latest +## INFO [2024-03-06 10:57:48] Reporting estimates using data up to: 2020-04-21 +## INFO [2024-03-06 10:57:48] No target directory specified so returning output +## INFO [2024-03-06 10:57:48] Producing estimates for: testland, realland +## INFO [2024-03-06 10:57:48] Regions excluded: none +## DEBUG [2024-03-06 10:57:48] testland: Running in exact mode for 1000 samples (across 4 chains each with a warm up of 250 iterations each) and 81 time steps of which 7 are a forecast +## WARN [2024-03-06 10:57:59] testland (chain: 1): Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. ## Running the chains for more iterations may help. See ## https://mc-stan.org/misc/warnings.html#bulk-ess - -## INFO [2023-11-24 16:32:08] Completed estimates for: testland -## DEBUG [2023-11-24 16:32:08] realland: Running in exact mode for 1000 samples (across 4 chains each with a warm up of 250 iterations each) and 79 time steps of which 7 are a forecast -## WARN [2023-11-24 16:32:21] realland (chain: 1): Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. +## WARN [2024-03-06 10:58:00] testland (chain: 1): Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable. +## Running the chains for more iterations may help. See +## https://mc-stan.org/misc/warnings.html#tail-ess - +## INFO [2024-03-06 10:58:00] Completed estimates for: testland +## DEBUG [2024-03-06 10:58:00] realland: Running in exact mode for 1000 samples (across 4 chains each with a warm up of 250 iterations each) and 81 time steps of which 7 are a forecast +## WARN [2024-03-06 10:58:10] realland (chain: 1): Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. ## Running the chains for more iterations may help. See ## https://mc-stan.org/misc/warnings.html#bulk-ess - -## INFO [2023-11-24 16:32:21] Completed estimates for: realland -## INFO [2023-11-24 16:32:21] Completed regional estimates -## INFO [2023-11-24 16:32:21] Regions with estimates: 2 -## INFO [2023-11-24 16:32:21] Regions with runtime errors: 0 -## INFO [2023-11-24 16:32:21] Producing summary -## INFO [2023-11-24 16:32:21] No summary directory specified so returning summary output -## INFO [2023-11-24 16:32:22] No target directory specified so returning timings +## INFO [2024-03-06 10:58:11] Completed estimates for: realland +## INFO [2024-03-06 10:58:11] Completed regional estimates +## INFO [2024-03-06 10:58:11] Regions with estimates: 2 +## INFO [2024-03-06 10:58:11] Regions with runtime errors: 0 +## INFO [2024-03-06 10:58:11] Producing summary +## INFO [2024-03-06 10:58:11] No summary directory specified so returning summary output +## INFO [2024-03-06 10:58:11] No target directory specified so returning timings ``` Results from each region are stored in a `regional` list with across region summary measures and plots stored in a `summary` list. All results can be set to be internally saved by setting the `target_folder` and `summary_dir` arguments. Each region can be estimated in parallel using the `{future}` package (when in most scenarios `cores` should be set to 1). For routine use each MCMC chain can also be run in parallel (with `future` = TRUE) with a time out (`max_execution_time`) allowing for partial results to be returned if a subset of chains is running longer than expected. See the documentation for the `{future}` package for details on nested futures. @@ -282,10 +309,10 @@ knitr::kable(estimates$summary$summarised_results$table) -|Region |New confirmed cases by infection date |Expected change in daily cases |Effective reproduction no. |Rate of growth |Doubling/halving time (days) | -|:--------|:-------------------------------------|:------------------------------|:--------------------------|:----------------------|:----------------------------| -|realland |2137 (1071 -- 4099) |Likely decreasing |0.85 (0.6 -- 1.2) |-0.034 (-0.1 -- 0.034) |-20 (20 -- -6.9) | -|testland |2219 (1155 -- 4015) |Likely decreasing |0.87 (0.63 -- 1.1) |-0.03 (-0.093 -- 0.03) |-23 (23 -- -7.4) | +|Region |New confirmed cases by infection date |Expected change in daily cases |Effective reproduction no. |Rate of growth |Doubling/halving time (days) | +|:--------|:-------------------------------------|:------------------------------|:--------------------------|:------------------------|:----------------------------| +|realland |2160 (1134 -- 4105) |Likely decreasing |0.86 (0.62 -- 1.1) |-0.032 (-0.093 -- 0.03) |-22 (23 -- -7.5) | +|testland |2137 (1110 -- 4238) |Likely decreasing |0.86 (0.62 -- 1.2) |-0.032 (-0.097 -- 0.032) |-22 (22 -- -7.2) | A range of plots are again returned (with the single summary plot shown below). diff --git a/vignettes/EpiNow2.Rmd.orig b/vignettes/EpiNow2.Rmd.orig index 26a8ff8c1..e92080dea 100644 --- a/vignettes/EpiNow2.Rmd.orig +++ b/vignettes/EpiNow2.Rmd.orig @@ -24,9 +24,9 @@ library(EpiNow2) ### Reporting delays, incubation period and generation time -Distributions can be supplied in two ways. First, by supplying delay data to `estimate_delay()`, where a subsampled bootstrapped lognormal will be fit to account for uncertainty in the observed data without being biased by changes in incidence (see `?EpiNow2::estimate_delay()`). +Distributions can be supplied in two ways. First, one can supplying delay data to `estimate_delay()`, where a subsampled bootstrapped lognormal will be fit to account for uncertainty in the observed data without being biased by changes in incidence (see `?EpiNow2::estimate_delay()`). -Second, by specifying predetermined delays with uncertainty using `dist_spec()`. An arbitrary number of delay distributions are supported in `dist_spec()` with the most common use case likely to be an incubation period followed by a reporting delay (see `?EpiNow2::dist_spec()`). +Second, one can specify predetermined delays with uncertainty using the distribution functions such as `Gamma` or `Lognormal`. An arbitrary number of delay distributions are supported in `dist_spec()` with a common use case being an incubation period followed by a reporting delay. For more information on specifying distributions see (see `?EpiNow2::Distributions`). For example if data on the delay between onset and infection was available we could fit a distribution to it, using `estimate_delay()`, with appropriate uncertainty as follows (note this is a synthetic example), ```{r, eval = FALSE} @@ -36,18 +36,11 @@ reporting_delay <- estimate_delay( ) ``` -If data was not available we could instead specify an informed estimate of the likely delay -using `dist_spec()`. To demonstrate, we choose a lognormal distribution with mean 2, standard deviation 1 (both with some uncertainty) and a maximum of 10. *This is just an example and unlikely to apply in any particular use case*. +If data was not available we could instead specify an informed estimate of the likely delay using the distribution functions `Gamma` or `LogNormal`. +To demonstrate, we choose a lognormal distribution with mean 2, standard deviation 1 and a maximum of 10. *This is just an example and unlikely to apply in any particular use case*. ```{r} -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), - mean_sd = 0.1, - sd = convert_to_logsd(2, 1), - sd_sd = 0.1, - max = 10, - distribution = "lognormal" -) +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) reporting_delay ``` @@ -150,4 +143,4 @@ A range of plots are again returned (with the single summary plot shown below). ```{r, dpi = 330, fig.width = 12, fig.height = 12, message = FALSE, warning = FALSE} estimates$summary$summary_plot -``` \ No newline at end of file +``` diff --git a/vignettes/epinow.Rmd b/vignettes/epinow.Rmd index 2ad1c5bb0..31a3e3c04 100644 --- a/vignettes/epinow.Rmd +++ b/vignettes/epinow.Rmd @@ -30,10 +30,7 @@ This should be replaced with parameters relevant to the system that is being stu library("EpiNow2") options(mc.cores = 4) reported_cases <- example_confirmed[1:60] -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), mean_sd = 0, - sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10 -) +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) delay <- example_incubation_period + reporting_delay rt_prior <- list(mean = 2, sd = 0.1) ``` @@ -49,13 +46,13 @@ res <- epinow(reported_cases, target_folder = "results" ) #> Logging threshold set at INFO for the EpiNow2 logger -#> Writing EpiNow2 logs to the console and: /tmp/RtmprzMMcb/regional-epinow/2020-04-21.log +#> Writing EpiNow2 logs to the console and: /var/folders/n9/h_419gjj2mg3d208nplgvbg40000gp/T//RtmpCLEtE0/regional-epinow/2020-04-21.log #> Logging threshold set at INFO for the EpiNow2.epinow logger -#> Writing EpiNow2.epinow logs to the console and: /tmp/RtmprzMMcb/epinow/2020-04-21.log -#> WARN [2023-10-21 09:32:12] epinow: There were 17 divergent transitions after warmup. See +#> Writing EpiNow2.epinow logs to the console and: /var/folders/n9/h_419gjj2mg3d208nplgvbg40000gp/T//RtmpCLEtE0/epinow/2020-04-21.log +#> WARN [2024-03-06 10:52:09] epinow: There were 9 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. - -#> WARN [2023-10-21 09:32:12] epinow: Examine the pairs() plot to diagnose sampling problems +#> WARN [2024-03-06 10:52:09] epinow: Examine the pairs() plot to diagnose sampling problems #> - res$plots$R #> NULL @@ -89,41 +86,44 @@ region_rt <- regional_epinow( delays = delay_opts(delay), rt = rt_opts(prior = rt_prior), ) -#> INFO [2023-10-21 09:32:19] Producing following optional outputs: regions, summary, samples, plots, latest +#> INFO [2024-03-06 10:52:13] Producing following optional outputs: regions, summary, samples, plots, latest #> Logging threshold set at INFO for the EpiNow2 logger -#> Writing EpiNow2 logs to the console and: /tmp/RtmprzMMcb/regional-epinow/2020-04-21.log +#> Writing EpiNow2 logs to the console and: /var/folders/n9/h_419gjj2mg3d208nplgvbg40000gp/T//RtmpCLEtE0/regional-epinow/2020-04-21.log #> Logging threshold set at INFO for the EpiNow2.epinow logger -#> Writing EpiNow2.epinow logs to: /tmp/RtmprzMMcb/epinow/2020-04-21.log -#> INFO [2023-10-21 09:32:19] Reporting estimates using data up to: 2020-04-21 -#> INFO [2023-10-21 09:32:19] No target directory specified so returning output -#> INFO [2023-10-21 09:32:19] Producing estimates for: testland, realland -#> INFO [2023-10-21 09:32:19] Regions excluded: none -#> INFO [2023-10-21 09:33:56] Completed estimates for: testland -#> INFO [2023-10-21 09:35:26] Completed estimates for: realland -#> INFO [2023-10-21 09:35:26] Completed regional estimates -#> INFO [2023-10-21 09:35:26] Regions with estimates: 2 -#> INFO [2023-10-21 09:35:26] Regions with runtime errors: 0 -#> INFO [2023-10-21 09:35:26] Producing summary -#> INFO [2023-10-21 09:35:26] No summary directory specified so returning summary output -#> INFO [2023-10-21 09:35:26] No target directory specified so returning timings +#> Writing EpiNow2.epinow logs to: /var/folders/n9/h_419gjj2mg3d208nplgvbg40000gp/T//RtmpCLEtE0/epinow/2020-04-21.log +#> INFO [2024-03-06 10:52:13] Reporting estimates using data up to: 2020-04-21 +#> INFO [2024-03-06 10:52:13] No target directory specified so returning output +#> INFO [2024-03-06 10:52:13] Producing estimates for: testland, realland +#> INFO [2024-03-06 10:52:13] Regions excluded: none +#> INFO [2024-03-06 10:52:48] Completed estimates for: testland +#> INFO [2024-03-06 10:53:17] Completed estimates for: realland +#> INFO [2024-03-06 10:53:17] Completed regional estimates +#> INFO [2024-03-06 10:53:17] Regions with estimates: 2 +#> INFO [2024-03-06 10:53:17] Regions with runtime errors: 0 +#> INFO [2024-03-06 10:53:17] Producing summary +#> INFO [2024-03-06 10:53:17] No summary directory specified so returning summary output +#> INFO [2024-03-06 10:53:17] No target directory specified so returning timings ## summary region_rt$summary$summarised_results$table #> Region New confirmed cases by infection date -#> 1: realland 2119 (876 -- 4309) -#> 2: testland 2082 (823 -- 4219) +#> +#> 1: realland 2254 (1153 -- 4243) +#> 2: testland 2261 (1059 -- 4347) #> Expected change in daily cases Effective reproduction no. -#> 1: Likely decreasing 0.89 (0.61 -- 1.2) +#> +#> 1: Likely decreasing 0.87 (0.6 -- 1.2) #> 2: Likely decreasing 0.88 (0.6 -- 1.2) -#> Rate of growth Doubling/halving time (days) -#> 1: -0.033 (-0.14 -- 0.041) -21 (17 -- -5.1) -#> 2: -0.035 (-0.14 -- 0.04) -20 (17 -- -5) +#> Rate of growth Doubling/halving time (days) +#> +#> 1: -0.028 (-0.1 -- 0.034) -24 (20 -- -6.9) +#> 2: -0.027 (-0.1 -- 0.037) -25 (19 -- -6.8) ## plot region_rt$summary$plots$R ``` ![plot of chunk regional_epinow](figure/regional_epinow-1.png) -If instead, we wanted to use the Gaussian Process for `testland` and a weekly random walk for `realland` we could specify these separately using the `opts_list()` from the package and `modifyList()` from `R`. +If instead, we wanted to use the Gaussian Process for `testland` and a weekly random walk for `realland` we could specify these separately using the `opts_list()` function from the package and `modifyList()` from `R`. ```r @@ -136,34 +136,37 @@ region_separate_rt <- regional_epinow( delays = delay_opts(delay), rt = rt, gp = gp, ) -#> INFO [2023-10-21 09:35:27] Producing following optional outputs: regions, summary, samples, plots, latest +#> INFO [2024-03-06 10:53:17] Producing following optional outputs: regions, summary, samples, plots, latest #> Logging threshold set at INFO for the EpiNow2 logger -#> Writing EpiNow2 logs to the console and: /tmp/RtmprzMMcb/regional-epinow/2020-04-21.log +#> Writing EpiNow2 logs to the console and: /var/folders/n9/h_419gjj2mg3d208nplgvbg40000gp/T//RtmpCLEtE0/regional-epinow/2020-04-21.log #> Logging threshold set at INFO for the EpiNow2.epinow logger -#> Writing EpiNow2.epinow logs to: /tmp/RtmprzMMcb/epinow/2020-04-21.log -#> INFO [2023-10-21 09:35:27] Reporting estimates using data up to: 2020-04-21 -#> INFO [2023-10-21 09:35:27] No target directory specified so returning output -#> INFO [2023-10-21 09:35:27] Producing estimates for: testland, realland -#> INFO [2023-10-21 09:35:27] Regions excluded: none -#> INFO [2023-10-21 09:37:00] Completed estimates for: testland -#> INFO [2023-10-21 09:37:36] Completed estimates for: realland -#> INFO [2023-10-21 09:37:36] Completed regional estimates -#> INFO [2023-10-21 09:37:36] Regions with estimates: 2 -#> INFO [2023-10-21 09:37:36] Regions with runtime errors: 0 -#> INFO [2023-10-21 09:37:36] Producing summary -#> INFO [2023-10-21 09:37:36] No summary directory specified so returning summary output -#> INFO [2023-10-21 09:37:36] No target directory specified so returning timings +#> Writing EpiNow2.epinow logs to: /var/folders/n9/h_419gjj2mg3d208nplgvbg40000gp/T//RtmpCLEtE0/epinow/2020-04-21.log +#> INFO [2024-03-06 10:53:17] Reporting estimates using data up to: 2020-04-21 +#> INFO [2024-03-06 10:53:17] No target directory specified so returning output +#> INFO [2024-03-06 10:53:17] Producing estimates for: testland, realland +#> INFO [2024-03-06 10:53:17] Regions excluded: none +#> INFO [2024-03-06 10:53:50] Completed estimates for: testland +#> INFO [2024-03-06 10:53:59] Completed estimates for: realland +#> INFO [2024-03-06 10:53:59] Completed regional estimates +#> INFO [2024-03-06 10:53:59] Regions with estimates: 2 +#> INFO [2024-03-06 10:53:59] Regions with runtime errors: 0 +#> INFO [2024-03-06 10:53:59] Producing summary +#> INFO [2024-03-06 10:53:59] No summary directory specified so returning summary output +#> INFO [2024-03-06 10:54:00] No target directory specified so returning timings ## summary region_separate_rt$summary$summarised_results$table #> Region New confirmed cases by infection date -#> 1: realland 2013 (953 -- 4182) -#> 2: testland 2052 (855 -- 4277) +#> +#> 1: realland 2122 (1144 -- 3976) +#> 2: testland 2177 (992 -- 4216) #> Expected change in daily cases Effective reproduction no. -#> 1: Likely decreasing 0.88 (0.66 -- 1.2) -#> 2: Likely decreasing 0.88 (0.6 -- 1.2) -#> Rate of growth Doubling/halving time (days) -#> 1: -0.035 (-0.12 -- 0.04) -20 (17 -- -6) -#> 2: -0.036 (-0.14 -- 0.045) -19 (16 -- -5) +#> +#> 1: Likely decreasing 0.85 (0.61 -- 1.2) +#> 2: Likely decreasing 0.86 (0.54 -- 1.2) +#> Rate of growth Doubling/halving time (days) +#> +#> 1: -0.033 (-0.094 -- 0.031) -21 (22 -- -7.3) +#> 2: -0.031 (-0.11 -- 0.034) -22 (20 -- -6.1) ## plot region_separate_rt$summary$plots$R ``` diff --git a/vignettes/epinow.Rmd.orig b/vignettes/epinow.Rmd.orig index ad41f91d1..b0b2fb782 100644 --- a/vignettes/epinow.Rmd.orig +++ b/vignettes/epinow.Rmd.orig @@ -36,10 +36,7 @@ This should be replaced with parameters relevant to the system that is being stu library("EpiNow2") options(mc.cores = 4) reported_cases <- example_confirmed[1:60] -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), mean_sd = 0, - sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10 -) +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) delay <- example_incubation_period + reporting_delay rt_prior <- list(mean = 2, sd = 0.1) ``` diff --git a/vignettes/estimate_infections_options.Rmd b/vignettes/estimate_infections_options.Rmd index 3c5f6cc59..5a76749ac 100644 --- a/vignettes/estimate_infections_options.Rmd +++ b/vignettes/estimate_infections_options.Rmd @@ -26,10 +26,15 @@ We first load the _EpiNow2_ package and also the _rstan_ package that we will us ```r library("EpiNow2") +#> +#> Attaching package: 'EpiNow2' +#> The following object is masked from 'package:stats': +#> +#> Gamma library("rstan") #> Loading required package: StanHeaders #> -#> rstan version 2.33.1.9000 (Stan version 2.33.0) +#> rstan version 2.32.6 (Stan version 2.32.2) #> For execution on a local, multicore CPU with excess RAM we recommend calling #> options(mc.cores = parallel::detectCores()). #> To avoid recompilation of unchanged Stan programs, we recommend calling @@ -74,8 +79,19 @@ Delays reflect the time that passes between infection and reporting, if these ex ```r example_incubation_period -#> -#> Uncertain lognormal distribution with (untruncated) logmean 1.6 (SD 0.064) and logSD 0.42 (SD 0.069) +#> - lognormal distribution (max: 14): +#> meanlog: +#> - normal distribution: +#> mean: +#> 1.6 +#> sd: +#> 0.064 +#> sdlog: +#> - normal distribution: +#> mean: +#> 0.42 +#> sd: +#> 0.069 ``` For the reporting delay, we use a lognormal distribution with mean of 2 days and standard deviation of 1 day. @@ -83,15 +99,13 @@ Note that the mean and standard deviation must be converted to the log scale, wh ```r -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), mean_sd = 0, - sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10 -) -#> Warning: The meaning of the 'max' argument has changed compared to previous versions. It now indicates the maximum of a distribution rather than the length of the probability mass function (including 0) that it represented previously. To replicate previous behaviour reduce max by 1. -#> This warning is displayed once every 8 hours. +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) reporting_delay -#> -#> Fixed distribution with PMF [0.11 0.48 0.27 0.093 0.029 0.0096 0.0033 0.0012 0.00045 0.00018 7.4e-05] +#> - lognormal distribution (max: 10): +#> meanlog: +#> 0.58 +#> sdlog: +#> 0.47 ``` _EpiNow2_ provides a feature that allows us to combine these delays into one by summing them up @@ -100,10 +114,25 @@ _EpiNow2_ provides a feature that allows us to combine these delays into one by ```r delay <- example_incubation_period + reporting_delay delay -#> -#> Combination of delay distributions: -#> Uncertain lognormal distribution with (untruncated) logmean 1.6 (SD 0.064) and logSD 0.42 (SD 0.069) -#> Fixed distribution with PMF [0.11 0.48 0.27 0.093 0.029 0.0096 0.0033 0.0012 0.00045 0.00018 7.4e-05] +#> Composite distribution: +#> - lognormal distribution (max: 14): +#> meanlog: +#> - normal distribution: +#> mean: +#> 1.6 +#> sd: +#> 0.064 +#> sdlog: +#> - normal distribution: +#> mean: +#> 0.42 +#> sd: +#> 0.069 +#> - lognormal distribution (max: 10): +#> meanlog: +#> 0.58 +#> sdlog: +#> 0.47 ``` ## Generation time @@ -113,8 +142,19 @@ If we want to estimate the reproduction number we need to provide a distribution ```r example_generation_time -#> -#> Uncertain gamma distribution with (untruncated) mean 3.6 (SD 0.71) and SD 3.1 (SD 0.77) +#> - gamma distribution (max: 14): +#> shape: +#> - normal distribution: +#> mean: +#> 1.4 +#> sd: +#> 0.48 +#> rate: +#> - normal distribution: +#> mean: +#> 0.38 +#> sd: +#> 0.25 ``` ## Initial reproduction number @@ -142,25 +182,26 @@ def <- estimate_infections(reported_cases, delays = delay_opts(delay), rt = rt_opts(prior = rt_prior) ) -#> Warning: There were 7 divergent transitions after warmup. See +#> Warning: There were 4 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. #> Warning: Examine the pairs() plot to diagnose sampling problems # summarise results summary(def) -#> measure estimate -#> 1: New confirmed cases by infection date 2326 (1160 -- 4361) -#> 2: Expected change in daily cases Likely decreasing -#> 3: Effective reproduction no. 0.89 (0.62 -- 1.2) -#> 4: Rate of growth -0.024 (-0.094 -- 0.036) -#> 5: Doubling/halving time (days) -29 (19 -- -7.4) +#> measure estimate +#> +#> 1: New confirmed cases by infection date 2271 (1111 -- 4285) +#> 2: Expected change in daily cases Likely decreasing +#> 3: Effective reproduction no. 0.88 (0.6 -- 1.2) +#> 4: Rate of growth -0.027 (-0.1 -- 0.037) +#> 5: Doubling/halving time (days) -25 (19 -- -6.9) # elapsed time (in seconds) get_elapsed_time(def$fit) #> warmup sample -#> chain:1 33.573 24.459 -#> chain:2 27.082 34.938 -#> chain:3 34.183 24.497 -#> chain:4 30.027 31.835 +#> chain:1 11.075 14.604 +#> chain:2 13.366 9.757 +#> chain:3 13.132 13.092 +#> chain:4 13.467 17.969 # summary plot plot(def) ``` @@ -179,25 +220,26 @@ agp <- estimate_infections(reported_cases, rt = rt_opts(prior = rt_prior), gp = gp_opts(basis_prop = 0.1) ) -#> Warning: There were 8 divergent transitions after warmup. See +#> Warning: There were 4 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. #> Warning: Examine the pairs() plot to diagnose sampling problems # summarise results summary(agp) #> measure estimate -#> 1: New confirmed cases by infection date 2343 (1188 -- 4363) +#> +#> 1: New confirmed cases by infection date 2306 (1175 -- 4321) #> 2: Expected change in daily cases Likely decreasing -#> 3: Effective reproduction no. 0.89 (0.63 -- 1.2) -#> 4: Rate of growth -0.024 (-0.091 -- 0.039) -#> 5: Doubling/halving time (days) -29 (18 -- -7.6) +#> 3: Effective reproduction no. 0.88 (0.63 -- 1.2) +#> 4: Rate of growth -0.026 (-0.093 -- 0.034) +#> 5: Doubling/halving time (days) -26 (20 -- -7.5) # elapsed time (in seconds) get_elapsed_time(agp$fit) #> warmup sample -#> chain:1 23.392 23.266 -#> chain:2 18.817 23.019 -#> chain:3 19.906 23.454 -#> chain:4 27.043 23.983 +#> chain:1 7.821 9.160 +#> chain:2 8.874 11.466 +#> chain:3 10.379 14.372 +#> chain:4 6.943 12.677 # summary plot plot(agp) ``` @@ -220,25 +262,26 @@ dep <- estimate_infections(reported_cases, pop = 1000000, future = "latest" ) ) -#> Warning: There were 14 divergent transitions after warmup. See +#> Warning: There were 12 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. #> Warning: Examine the pairs() plot to diagnose sampling problems # summarise results summary(dep) #> measure estimate -#> 1: New confirmed cases by infection date 2275 (1141 -- 4324) +#> +#> 1: New confirmed cases by infection date 2329 (1146 -- 4415) #> 2: Expected change in daily cases Likely decreasing -#> 3: Effective reproduction no. 0.88 (0.62 -- 1.2) -#> 4: Rate of growth -0.027 (-0.096 -- 0.036) -#> 5: Doubling/halving time (days) -26 (19 -- -7.2) +#> 3: Effective reproduction no. 0.89 (0.62 -- 1.2) +#> 4: Rate of growth -0.026 (-0.094 -- 0.037) +#> 5: Doubling/halving time (days) -27 (19 -- -7.4) # elapsed time (in seconds) get_elapsed_time(dep$fit) #> warmup sample -#> chain:1 30.522 24.801 -#> chain:2 29.964 24.931 -#> chain:3 33.582 25.698 -#> chain:4 27.050 25.586 +#> chain:1 13.161 9.823 +#> chain:2 12.282 14.130 +#> chain:3 14.255 18.819 +#> chain:4 10.584 9.753 # summary plot plot(dep) ``` @@ -252,11 +295,30 @@ Here, instead of doing so we assume that we know about truncation with mean of 1 ```r -trunc_dist <- dist_spec( - mean = convert_to_logmean(0.5, 0.5), mean_sd = 0.1, - sd = convert_to_logsd(0.5, 0.5), sd_sd = 0.1, +trunc_dist <- LogNormal( + mean = Normal(0.5, 0.1), + sd = Normal(0.5, 0.1), max = 3 ) +#> Warning in new_dist_spec(params, "lognormal"): Uncertain lognormal distribution +#> specified in terms of parameters that are not the "natural" parameters of the +#> distribution (meanlog, sdlog). Converting using a crude and very approximate +#> method that is likely to produce biased results. If possible, it is preferable +#> to specify the distribution directly in terms of the natural parameters. +trunc_dist +#> - lognormal distribution (max: 3): +#> meanlog: +#> - normal distribution: +#> mean: +#> -1 +#> sd: +#> 0.14 +#> sdlog: +#> - normal distribution: +#> mean: +#> 0.83 +#> sd: +#> 0.13 ``` We can then use this in the `esimtate_infections()` function using the `truncation` option. @@ -269,25 +331,26 @@ trunc <- estimate_infections(reported_cases, truncation = trunc_opts(trunc_dist), rt = rt_opts(prior = rt_prior) ) -#> Warning: There were 6 divergent transitions after warmup. See +#> Warning: There were 11 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. #> Warning: Examine the pairs() plot to diagnose sampling problems # summarise results summary(trunc) -#> measure estimate -#> 1: New confirmed cases by infection date 2477 (1311 -- 4653) -#> 2: Expected change in daily cases Likely decreasing -#> 3: Effective reproduction no. 0.91 (0.65 -- 1.2) -#> 4: Rate of growth -0.02 (-0.085 -- 0.044) -#> 5: Doubling/halving time (days) -34 (16 -- -8.2) +#> measure estimate +#> +#> 1: New confirmed cases by infection date 2485 (1252 -- 4790) +#> 2: Expected change in daily cases Likely decreasing +#> 3: Effective reproduction no. 0.91 (0.65 -- 1.2) +#> 4: Rate of growth -0.019 (-0.087 -- 0.045) +#> 5: Doubling/halving time (days) -36 (15 -- -7.9) # elapsed time (in seconds) get_elapsed_time(trunc$fit) #> warmup sample -#> chain:1 32.951 25.092 -#> chain:2 33.920 25.699 -#> chain:3 31.455 46.192 -#> chain:4 27.765 26.048 +#> chain:1 11.657 9.733 +#> chain:2 16.154 9.816 +#> chain:3 16.801 13.905 +#> chain:4 12.173 10.649 # summary plot plot(trunc) ``` @@ -308,25 +371,26 @@ project_rt <- estimate_infections(reported_cases, prior = rt_prior, future = "project" ) ) -#> Warning: There were 4 divergent transitions after warmup. See +#> Warning: There were 19 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. #> Warning: Examine the pairs() plot to diagnose sampling problems # summarise results summary(project_rt) #> measure estimate -#> 1: New confirmed cases by infection date 2246 (1150 -- 4539) +#> +#> 1: New confirmed cases by infection date 2285 (1145 -- 4321) #> 2: Expected change in daily cases Likely decreasing -#> 3: Effective reproduction no. 0.87 (0.62 -- 1.2) -#> 4: Rate of growth -0.028 (-0.096 -- 0.042) -#> 5: Doubling/halving time (days) -24 (16 -- -7.2) +#> 3: Effective reproduction no. 0.89 (0.61 -- 1.2) +#> 4: Rate of growth -0.025 (-0.099 -- 0.039) +#> 5: Doubling/halving time (days) -28 (18 -- -7) # elapsed time (in seconds) get_elapsed_time(project_rt$fit) #> warmup sample -#> chain:1 28.858 24.636 -#> chain:2 36.509 47.931 -#> chain:3 32.023 47.087 -#> chain:4 34.074 27.767 +#> chain:1 14.177 17.887 +#> chain:2 13.982 9.758 +#> chain:3 11.849 9.622 +#> chain:4 16.444 13.904 # summary plot plot(project_rt) ``` @@ -346,19 +410,20 @@ fixed <- estimate_infections(reported_cases, ) # summarise results summary(fixed) -#> measure estimate -#> 1: New confirmed cases by infection date 15737 (8930 -- 29454) -#> 2: Expected change in daily cases Increasing -#> 3: Effective reproduction no. 1.2 (1.1 -- 1.3) -#> 4: Rate of growth 0.038 (0.026 -- 0.05) -#> 5: Doubling/halving time (days) 18 (14 -- 27) +#> measure estimate +#> +#> 1: New confirmed cases by infection date 15887 (9181 -- 28140) +#> 2: Expected change in daily cases Increasing +#> 3: Effective reproduction no. 1.2 (1.1 -- 1.3) +#> 4: Rate of growth 0.038 (0.027 -- 0.049) +#> 5: Doubling/halving time (days) 18 (14 -- 26) # elapsed time (in seconds) get_elapsed_time(fixed$fit) #> warmup sample -#> chain:1 2.091 1.127 -#> chain:2 1.884 0.847 -#> chain:3 1.912 0.981 -#> chain:4 1.857 0.900 +#> chain:1 0.666 0.524 +#> chain:2 0.671 0.398 +#> chain:3 0.706 0.425 +#> chain:4 0.551 0.514 # summary plot plot(fixed) ``` @@ -391,19 +456,20 @@ bkp <- estimate_infections(bp_cases, ) # summarise results summary(bkp) -#> measure estimate -#> 1: New confirmed cases by infection date 2468 (2035 -- 2968) -#> 2: Expected change in daily cases Decreasing -#> 3: Effective reproduction no. 0.91 (0.88 -- 0.93) -#> 4: Rate of growth -0.021 (-0.026 -- -0.015) -#> 5: Doubling/halving time (days) -34 (-46 -- -27) +#> measure estimate +#> +#> 1: New confirmed cases by infection date 2537 (2117 -- 3067) +#> 2: Expected change in daily cases Decreasing +#> 3: Effective reproduction no. 0.91 (0.88 -- 0.93) +#> 4: Rate of growth -0.02 (-0.025 -- -0.014) +#> 5: Doubling/halving time (days) -35 (-50 -- -28) # elapsed time (in seconds) get_elapsed_time(bkp$fit) #> warmup sample -#> chain:1 3.115 2.705 -#> chain:2 2.910 2.433 -#> chain:3 3.695 2.271 -#> chain:4 3.683 2.654 +#> chain:1 1.301 1.325 +#> chain:2 1.147 1.201 +#> chain:3 1.710 1.384 +#> chain:4 0.961 1.147 # summary plot plot(bkp) ``` @@ -426,18 +492,19 @@ rw <- estimate_infections(reported_cases, # summarise results summary(rw) #> measure estimate -#> 1: New confirmed cases by infection date 2168 (1146 -- 4136) +#> +#> 1: New confirmed cases by infection date 2182 (1103 -- 4267) #> 2: Expected change in daily cases Likely decreasing -#> 3: Effective reproduction no. 0.86 (0.63 -- 1.2) -#> 4: Rate of growth -0.031 (-0.091 -- 0.031) -#> 5: Doubling/halving time (days) -22 (22 -- -7.6) +#> 3: Effective reproduction no. 0.87 (0.62 -- 1.2) +#> 4: Rate of growth -0.031 (-0.097 -- 0.032) +#> 5: Doubling/halving time (days) -22 (21 -- -7.1) # elapsed time (in seconds) get_elapsed_time(rw$fit) #> warmup sample -#> chain:1 7.324 9.414 -#> chain:2 9.205 9.808 -#> chain:3 8.057 10.079 -#> chain:4 6.632 7.324 +#> chain:1 2.500 3.987 +#> chain:2 2.341 4.075 +#> chain:3 2.303 3.118 +#> chain:4 2.235 3.873 # summary plot plot(rw) ``` @@ -461,18 +528,19 @@ no_delay <- estimate_infections( # summarise results summary(no_delay) #> measure estimate -#> 1: New confirmed cases by infection date 2790 (2313 -- 3329) +#> +#> 1: New confirmed cases by infection date 2776 (2336 -- 3311) #> 2: Expected change in daily cases Decreasing -#> 3: Effective reproduction no. 0.88 (0.76 -- 0.99) -#> 4: Rate of growth -0.026 (-0.055 -- -0.0022) -#> 5: Doubling/halving time (days) -26 (-320 -- -13) +#> 3: Effective reproduction no. 0.88 (0.77 -- 0.99) +#> 4: Rate of growth -0.028 (-0.055 -- -0.0034) +#> 5: Doubling/halving time (days) -25 (-200 -- -13) # elapsed time (in seconds) get_elapsed_time(no_delay$fit) #> warmup sample -#> chain:1 42.446 68.684 -#> chain:2 32.455 34.207 -#> chain:3 42.812 62.019 -#> chain:4 28.186 37.280 +#> chain:1 17.540 15.429 +#> chain:2 20.581 19.910 +#> chain:3 16.588 28.510 +#> chain:4 15.363 17.303 # summary plot plot(no_delay) ``` @@ -497,19 +565,20 @@ non_parametric <- estimate_infections(reported_cases, ) # summarise results summary(non_parametric) -#> measure estimate -#> 1: New confirmed cases by infection date 2361 (1808 -- 3074) -#> 2: Expected change in daily cases Decreasing -#> 3: Effective reproduction no. 0.9 (0.8 -- 0.99) -#> 4: Rate of growth -0.023 (-0.045 -- -0.0014) -#> 5: Doubling/halving time (days) -30 (-510 -- -15) +#> measure estimate +#> +#> 1: New confirmed cases by infection date 2306 (1696 -- 3051) +#> 2: Expected change in daily cases Decreasing +#> 3: Effective reproduction no. 0.89 (0.72 -- 0.99) +#> 4: Rate of growth -0.024 (-0.05 -- -0.0025) +#> 5: Doubling/halving time (days) -29 (-280 -- -14) # elapsed time (in seconds) get_elapsed_time(non_parametric$fit) #> warmup sample -#> chain:1 2.811 0.823 -#> chain:2 2.688 0.784 -#> chain:3 3.013 0.826 -#> chain:4 3.280 0.805 +#> chain:1 0.950 0.325 +#> chain:2 1.007 0.331 +#> chain:3 0.996 0.332 +#> chain:4 0.958 0.332 # summary plot plot(non_parametric) ``` diff --git a/vignettes/estimate_infections_options.Rmd.orig b/vignettes/estimate_infections_options.Rmd.orig index 0122d28bd..f1339b6ec 100644 --- a/vignettes/estimate_infections_options.Rmd.orig +++ b/vignettes/estimate_infections_options.Rmd.orig @@ -71,10 +71,7 @@ For the reporting delay, we use a lognormal distribution with mean of 2 days and Note that the mean and standard deviation must be converted to the log scale, which can be done using the `convert_log_logmean()` function. ```{r reporting_delay} -reporting_delay <- dist_spec( - mean = convert_to_logmean(2, 1), mean_sd = 0, - sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10 -) +reporting_delay <- LogNormal(mean = 2, sd = 1, max = 10) reporting_delay ``` @@ -172,11 +169,12 @@ We might further want to adjust for right-truncation of recent data estimated us Here, instead of doing so we assume that we know about truncation with mean of 1/2 day, sd 1/2 day, following a lognormal distribution and with a maximum of three days. ```{r define_truncation} -trunc_dist <- dist_spec( - mean = convert_to_logmean(0.5, 0.5), mean_sd = 0.1, - sd = convert_to_logsd(0.5, 0.5), sd_sd = 0.1, +trunc_dist <- LogNormal( + mean = Normal(0.5, 0.1), + sd = Normal(0.5, 0.1), max = 3 ) +trunc_dist ``` We can then use this in the `esimtate_infections()` function using the `truncation` option. diff --git a/vignettes/estimate_infections_workflow.Rmd b/vignettes/estimate_infections_workflow.Rmd index d3765c4f9..875cded97 100644 --- a/vignettes/estimate_infections_workflow.Rmd +++ b/vignettes/estimate_infections_workflow.Rmd @@ -30,6 +30,7 @@ An example data set called `example_confirm` is included in the package: ```r head(example_confirmed) #> date confirm +#> #> 1: 2020-02-22 14 #> 2: 2020-02-23 62 #> 3: 2020-02-24 53 @@ -69,12 +70,12 @@ As these will affect any results, it is worth spending some time investigating w ## Delay distributions _EpiNow2_ works with different delays that apply to different parts of the infection and observation process. -They are defined using a common interface with the `dist_spec()` function. +They are defined using a common interface that involves functions that are named after the probability distributions, i.e. `LogNormal()`, `Gamma()`, etc. For help with this function, see its manual page ```r -?EpiNow2::dist_spec +?EpiNow2::Distributions ``` In all cases, the distributions given can be *fixed* (i.e. have no uncertainty) or *variable* (i.e. have associated uncertainty). @@ -82,9 +83,12 @@ For example, to define a fixed gamma distribution with mean 3, standard deviatio ```r -dist_spec(mean = 3, sd = 1, distribution = "gamma", max = 10) -#> -#> Fixed distribution with PMF [0.0038 0.15 0.39 0.3 0.12 0.03 0.006 0.00096 0.00013 1.6e-05 1.8e-06] +Gamma(mean = 3, sd = 1, max = 10) +#> - gamma distribution (max: 10): +#> shape: +#> 9 +#> rate: +#> 3 ``` If distributions are variable, the values with uncertainty are treated as [prior probability densities](https://en.wikipedia.org/wiki/Prior_probability) in the Bayesian inference framework used by _EpiNow2_, i.e. they are estimated as part of the inference. @@ -92,13 +96,30 @@ For example, to define a variable gamma distribution where uncertainty in the me ```r -dist_spec( - mean = 3, mean_sd = 2, sd = 1, sd_sd = 0.1, distribution = "gamma", max = 10 -) -#> -#> Uncertain gamma distribution with (untruncated) mean 3 (SD 2) and SD 1 (SD 0.1) +Gamma(mean = Normal(3, 2), sd = Normal(1, 0.1), max = 10) +#> Warning in new_dist_spec(params, "gamma"): Uncertain gamma distribution +#> specified in terms of parameters that are not the "natural" parameters of the +#> distribution (shape, rate). Converting using a crude and very approximate +#> method that is likely to produce biased results. If possible, it is preferable +#> to specify the distribution directly in terms of the natural parameters. +#> - gamma distribution (max: 10): +#> shape: +#> - normal distribution: +#> mean: +#> 9 +#> sd: +#> 2.5 +#> rate: +#> - normal distribution: +#> mean: +#> 3 +#> sd: +#> 1.4 ``` +Note the warning about parameters. +We used the mean and standard deviation to define this distribution with uncertain parameters, but it would be better to use the "natural" parameters of the gamma distribution, shape and rate, for example using the values estimate and reported back after calling the previous command. + There are various ways the specific delay distributions mentioned below might be obtained. Often, they will come directly from the existing literature reviewed by the user and studies conducted elsewhere. Sometimes it might be possible to obtain them from existing databases, e.g. using the [epiparameter](https://github.com/epiverse-trace/epiparameter) R package. @@ -109,13 +130,13 @@ For a more comprehensive treatment of delays and their estimation avoiding commo ### Generation intervals The generation interval is a delay distribution that describes the amount of time that passes between an individual becoming infected and infecting someone else. -In _EpiNow2_, the generation time distribution is defined by a call to `generation_time_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by `dist_spec()`). -For example, to define the generation time as gamma distributed with uncertain mean centered on 3 (sd: 2) and sd centered on 1 (sd: 0.1), a maximum value of 10 and weighted by the number of case data points we would use +In _EpiNow2_, the generation time distribution is defined by a call to `generation_time_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by the function corresponding to the probability distribution, i.e. `LogNormal()`, `Gamma()`, etc.). +For example, to define the generation time as gamma distributed with uncertain mean centered on 3 and sd centered on 1 with some uncertainty, a maximum value of 10 and weighted by the number of case data points we could use the shape and rate parameters suggested above (though notes that this will only very approximately produce the uncertainty in mean and standard deviation stated there): ```r -generation_time <- dist_spec( - mean = 3, mean_sd = 2, sd = 1, sd_sd = 0.1, distribution = "gamma", max = 10 +generation_time <- Gamma( + shape = Normal(9, 2.5), rate = Normal(3, 1.4), max = 10 ) generation_time_opts(generation_time) ``` @@ -127,25 +148,39 @@ Usually this is not observed directly. Instead, we calculate case counts based on, for example, onset of symptoms, lab confirmations, hospitalisations, etc. In order to estimate the trajectory of infection incidence from this we need to either know or estimate the distribution of delays from infection to count. Often, such counts are composed of multiple delays for which we only have separate information, for example the incubation period (time from infection to symptom onset) and reporting delay (time from symptom onset to being a case in the data, e.g. via lab confirmation, if counts are not by the date of symptom onset). -In this case, we can combine multiple delays defined using `dist_spec()` with the plus (`+`) operator, e.g. +In this case, we can combine multiple delays with the plus (`+`) operator, e.g. ```r -incubation_period <- dist_spec( - mean = 1.6, mean_sd = 0.05, sd = 0.5, sd_sd = 0.05, - distribution = "lognormal", max = 14 -) -reporting_delay <- dist_spec( - mean = 0.5, sd = 0.5, distribution = "lognormal", max = 10 +incubation_period <- LogNormal( + meanlog = Normal(1.6, 0.05), + sdlog = Normal(0.5, 0.05), + max = 14 ) +reporting_delay <- LogNormal(meanlog = 0.5, sdlog = 0.5, max = 10) incubation_period + reporting_delay -#> -#> Combination of delay distributions: -#> Uncertain lognormal distribution with (untruncated) logmean 1.6 (SD 0.05) and logSD 0.5 (SD 0.05) -#> Fixed distribution with PMF [0.16 0.49 0.23 0.077 0.025 0.0084 0.003 0.0011 0.00045 0.00019 8.2e-05] +#> Composite distribution: +#> - lognormal distribution (max: 14): +#> meanlog: +#> - normal distribution: +#> mean: +#> 1.6 +#> sd: +#> 0.05 +#> sdlog: +#> - normal distribution: +#> mean: +#> 0.5 +#> sd: +#> 0.05 +#> - lognormal distribution (max: 10): +#> meanlog: +#> 0.5 +#> sdlog: +#> 0.5 ``` -In _EpiNow2_, the reporting delay distribution is defined by a call to `delay_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by `dist_spec()`). +In _EpiNow2_, the reporting delay distribution is defined by a call to `delay_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by `LogNormal()`, `Gamma()` etc.). For example, if our observations were by symptom onset we would use @@ -230,7 +265,7 @@ def <- estimate_infections( delays = delay_opts(delay), rt = rt_opts(prior = rt_prior) ) -#> Warning: There were 15 divergent transitions after warmup. See +#> Warning: There were 3 divergent transitions after warmup. See #> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup #> to find out why this is a problem and how to eliminate them. #> Warning: Examine the pairs() plot to diagnose sampling problems diff --git a/vignettes/estimate_infections_workflow.Rmd.orig b/vignettes/estimate_infections_workflow.Rmd.orig index 93b137a82..2effddff5 100644 --- a/vignettes/estimate_infections_workflow.Rmd.orig +++ b/vignettes/estimate_infections_workflow.Rmd.orig @@ -65,29 +65,30 @@ As these will affect any results, it is worth spending some time investigating w ## Delay distributions _EpiNow2_ works with different delays that apply to different parts of the infection and observation process. -They are defined using a common interface with the `dist_spec()` function. +They are defined using a common interface that involves functions that are named after the probability distributions, i.e. `LogNormal()`, `Gamma()`, etc. For help with this function, see its manual page ```{r eval = FALSE} -?EpiNow2::dist_spec +?EpiNow2::Distributions ``` In all cases, the distributions given can be *fixed* (i.e. have no uncertainty) or *variable* (i.e. have associated uncertainty). For example, to define a fixed gamma distribution with mean 3, standard deviation (sd) 1 and maximum value 10, you would write ```{r} -dist_spec(mean = 3, sd = 1, distribution = "gamma", max = 10) +Gamma(mean = 3, sd = 1, max = 10) ``` If distributions are variable, the values with uncertainty are treated as [prior probability densities](https://en.wikipedia.org/wiki/Prior_probability) in the Bayesian inference framework used by _EpiNow2_, i.e. they are estimated as part of the inference. For example, to define a variable gamma distribution where uncertainty in the mean is given by a normal distribution with mean 3 and sd 2, and uncertainty in the standard deviation is given by a normal distribution with mean 1 and sd 0.1, with a maximum value 10, you would write ```{r} -dist_spec( - mean = 3, mean_sd = 2, sd = 1, sd_sd = 0.1, distribution = "gamma", max = 10 -) +Gamma(mean = Normal(3, 2), sd = Normal(1, 0.1), max = 10) ``` +Note the warning about parameters. +We used the mean and standard deviation to define this distribution with uncertain parameters, but it would be better to use the "natural" parameters of the gamma distribution, shape and rate, for example using the values estimate and reported back after calling the previous command. + There are various ways the specific delay distributions mentioned below might be obtained. Often, they will come directly from the existing literature reviewed by the user and studies conducted elsewhere. Sometimes it might be possible to obtain them from existing databases, e.g. using the [epiparameter](https://github.com/epiverse-trace/epiparameter) R package. @@ -98,12 +99,12 @@ For a more comprehensive treatment of delays and their estimation avoiding commo ### Generation intervals The generation interval is a delay distribution that describes the amount of time that passes between an individual becoming infected and infecting someone else. -In _EpiNow2_, the generation time distribution is defined by a call to `generation_time_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by `dist_spec()`). -For example, to define the generation time as gamma distributed with uncertain mean centered on 3 (sd: 2) and sd centered on 1 (sd: 0.1), a maximum value of 10 and weighted by the number of case data points we would use +In _EpiNow2_, the generation time distribution is defined by a call to `generation_time_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by the function corresponding to the probability distribution, i.e. `LogNormal()`, `Gamma()`, etc.). +For example, to define the generation time as gamma distributed with uncertain mean centered on 3 and sd centered on 1 with some uncertainty, a maximum value of 10 and weighted by the number of case data points we could use the shape and rate parameters suggested above (though notes that this will only very approximately produce the uncertainty in mean and standard deviation stated there): ```{r, results = 'hide'} -generation_time <- dist_spec( - mean = 3, mean_sd = 2, sd = 1, sd_sd = 0.1, distribution = "gamma", max = 10 +generation_time <- Gamma( + shape = Normal(9, 2.5), rate = Normal(3, 1.4), max = 10 ) generation_time_opts(generation_time) ``` @@ -115,20 +116,19 @@ Usually this is not observed directly. Instead, we calculate case counts based on, for example, onset of symptoms, lab confirmations, hospitalisations, etc. In order to estimate the trajectory of infection incidence from this we need to either know or estimate the distribution of delays from infection to count. Often, such counts are composed of multiple delays for which we only have separate information, for example the incubation period (time from infection to symptom onset) and reporting delay (time from symptom onset to being a case in the data, e.g. via lab confirmation, if counts are not by the date of symptom onset). -In this case, we can combine multiple delays defined using `dist_spec()` with the plus (`+`) operator, e.g. +In this case, we can combine multiple delays with the plus (`+`) operator, e.g. ```{r} -incubation_period <- dist_spec( - mean = 1.6, mean_sd = 0.05, sd = 0.5, sd_sd = 0.05, - distribution = "lognormal", max = 14 -) -reporting_delay <- dist_spec( - mean = 0.5, sd = 0.5, distribution = "lognormal", max = 10 +incubation_period <- LogNormal( + meanlog = Normal(1.6, 0.05), + sdlog = Normal(0.5, 0.05), + max = 14 ) +reporting_delay <- LogNormal(meanlog = 0.5, sdlog = 0.5, max = 10) incubation_period + reporting_delay ``` -In _EpiNow2_, the reporting delay distribution is defined by a call to `delay_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by `dist_spec()`). +In _EpiNow2_, the reporting delay distribution is defined by a call to `delay_opts()`, a function that takes a single argument defined as a `dist_spec` object (returned by `LogNormal()`, `Gamma()` etc.). For example, if our observations were by symptom onset we would use ```{r, results = 'hide'} diff --git a/vignettes/figure/bp-1.png b/vignettes/figure/bp-1.png index d2c46289c..f1beee16e 100644 Binary files a/vignettes/figure/bp-1.png and b/vignettes/figure/bp-1.png differ diff --git a/vignettes/figure/data-1.png b/vignettes/figure/data-1.png index fba23c563..1bd776210 100644 Binary files a/vignettes/figure/data-1.png and b/vignettes/figure/data-1.png differ diff --git a/vignettes/figure/default-1.png b/vignettes/figure/default-1.png index ace14e659..1a69bf2ea 100644 Binary files a/vignettes/figure/default-1.png and b/vignettes/figure/default-1.png differ diff --git a/vignettes/figure/fixed-1.png b/vignettes/figure/fixed-1.png index cb064bf36..f70658473 100644 Binary files a/vignettes/figure/fixed-1.png and b/vignettes/figure/fixed-1.png differ diff --git a/vignettes/figure/gp_projection-1.png b/vignettes/figure/gp_projection-1.png index 2c3732c36..ad4ca77a3 100644 Binary files a/vignettes/figure/gp_projection-1.png and b/vignettes/figure/gp_projection-1.png differ diff --git a/vignettes/figure/lower_accuracy-1.png b/vignettes/figure/lower_accuracy-1.png index 3e9a25504..3a4f33b9e 100644 Binary files a/vignettes/figure/lower_accuracy-1.png and b/vignettes/figure/lower_accuracy-1.png differ diff --git a/vignettes/figure/no_delays-1.png b/vignettes/figure/no_delays-1.png index db3656ffd..dfdb3b740 100644 Binary files a/vignettes/figure/no_delays-1.png and b/vignettes/figure/no_delays-1.png differ diff --git a/vignettes/figure/nonparametric-1.png b/vignettes/figure/nonparametric-1.png index 9084d2676..a208ff299 100644 Binary files a/vignettes/figure/nonparametric-1.png and b/vignettes/figure/nonparametric-1.png differ diff --git a/vignettes/figure/regional_epinow-1.png b/vignettes/figure/regional_epinow-1.png index e705f3482..537f3ac58 100644 Binary files a/vignettes/figure/regional_epinow-1.png and b/vignettes/figure/regional_epinow-1.png differ diff --git a/vignettes/figure/regional_epinow_multiple-1.png b/vignettes/figure/regional_epinow_multiple-1.png index be4beabb1..5ef0abb73 100644 Binary files a/vignettes/figure/regional_epinow_multiple-1.png and b/vignettes/figure/regional_epinow_multiple-1.png differ diff --git a/vignettes/figure/results-1.png b/vignettes/figure/results-1.png index 1fe2b3800..b227996c6 100644 Binary files a/vignettes/figure/results-1.png and b/vignettes/figure/results-1.png differ diff --git a/vignettes/figure/susceptible_depletion-1.png b/vignettes/figure/susceptible_depletion-1.png index b1abf8d99..e5c8cb59f 100644 Binary files a/vignettes/figure/susceptible_depletion-1.png and b/vignettes/figure/susceptible_depletion-1.png differ diff --git a/vignettes/figure/truncation-1.png b/vignettes/figure/truncation-1.png index 689c418ee..a29cd9912 100644 Binary files a/vignettes/figure/truncation-1.png and b/vignettes/figure/truncation-1.png differ diff --git a/vignettes/figure/unnamed-chunk-10-1.png b/vignettes/figure/unnamed-chunk-10-1.png index 34402a043..a9081a131 100644 Binary files a/vignettes/figure/unnamed-chunk-10-1.png and b/vignettes/figure/unnamed-chunk-10-1.png differ diff --git a/vignettes/figure/unnamed-chunk-14-1.png b/vignettes/figure/unnamed-chunk-14-1.png index 3c218b665..e3448df56 100644 Binary files a/vignettes/figure/unnamed-chunk-14-1.png and b/vignettes/figure/unnamed-chunk-14-1.png differ diff --git a/vignettes/figure/weekly_rw-1.png b/vignettes/figure/weekly_rw-1.png index 672ed53bb..149448fac 100644 Binary files a/vignettes/figure/weekly_rw-1.png and b/vignettes/figure/weekly_rw-1.png differ