diff --git a/NEWS.md b/NEWS.md index f08468e59..660c682e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,6 +46,7 @@ * Updated the parameterisation of the dispersion term `phi` to be `phi = 1 / sqrt_phi ^ 2` rather than the previous parameterisation `phi = 1 / sqrt(sqrt_phi)` based on the suggested prior [here](https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations#story-when-the-generic-prior-fails-the-case-of-the-negative-binomial) and the performance benefits seen in the `epinowcast` package (see [here](https://github.com/epinowcast/epinowcast/blob/8eff560d1fd8305f5fb26c21324b2bfca1f002b4/inst/stan/epinowcast.stan#L314)). By @seabbs in #487 and reviewed by @sbfnk. * Added an `na` argument to `obs_opts()` that allows the user to specify whether NA values in the data should be interpreted as missing or accumulated in the next non-NA data point. By @sbfnk in #534 and reviewed by @seabbs. * Growth rates are now calculated directly from the infection trajectory as `log I(t) - log I(t - 1)`. Originally by @seabbs in #213, finished by @sbfnk in #610 and reviewed by @seabbs. +* Fixed a bug when using the nonmechanistic model that could lead to explosive growth. By @sbfnk in #612 and reviewed by @jamesmbaazam. # EpiNow2 1.4.0 diff --git a/R/create.R b/R/create.R index bf54f3612..cdc0b855b 100644 --- a/R/create.R +++ b/R/create.R @@ -101,6 +101,17 @@ create_complete_cases <- function(cases) { #' [estimate_infections()] to generate the mean shifted prior on which the back #' calculation method (see [backcalc_opts()]) is based. #' +#' @details +#' The function first shifts all the data back in time by `shift` days (thus +#' discarding the first `shift` days of data) and then applies a centred +#' rolling mean of length `smoothing_window` to the shifted data except for +#' the final period. The final period (the forecast horizon plus half the +#' smoothing window) is instead replaced by a log-linear model fit (with 1 +#' added to the data for fitting to avoid zeroes and later subtracted again), +#' projected to the end of the forecast horizon. The initial part of the data +#' (corresponding to the length of the smoothing window) is then removed, and +#' any non-integer resulting values rounded up. +#' #' @param smoothing_window Numeric, the rolling average smoothing window #' to apply. Must be odd in order to be defined as a centred average. #' @@ -114,7 +125,24 @@ create_complete_cases <- function(cases) { #' @return A `` for shifted reported cases #' @export #' @examples -#' create_shifted_cases(example_confirmed, 7, 14, 7) +#' shift <- 7 +#' horizon <- 7 +#' smoothing_window <- 14 +#' ## add NAs for horizon +#' cases <- create_clean_reported_cases(example_confirmed, horizon = horizon) +#' ## add zeroes initially +#' cases <- data.table::rbindlist(list( +#' data.table::data.table( +#' date = seq( +#' min(cases$date) - smoothing_window, +#' min(cases$date) - 1, +#' by = "days" +#' ), +#' confirm = 0, breakpoint = 0 +#' ), +#' cases +#' )) +#' create_shifted_cases(cases, shift, smoothing_window, horizon) create_shifted_cases <- function(reported_cases, shift, smoothing_window, horizon) { shifted_reported_cases <- data.table::copy(reported_cases)[ @@ -128,18 +156,18 @@ create_shifted_cases <- function(reported_cases, shift, confirm := runner::mean_run( confirm, k = smoothing_window, lag = -floor(smoothing_window / 2) ) - ][ - , - confirm := data.table::fifelse(confirm == 0, 1, confirm) # nolint ] ## Forecast trend on reported cases using the last week of data - final_week <- data.table::data.table( - confirm = shifted_reported_cases[1:(.N - horizon - shift)][ - max(1, .N - 6):.N]$confirm)[, + final_period <- data.table::data.table( + confirm = + shifted_reported_cases[!is.na(confirm)][ + max(1, .N - smoothing_window):.N + ]$confirm + )[, t := seq_len(.N) ] - lm_model <- stats::lm(log(confirm) ~ t, data = final_week) + lm_model <- stats::lm(log(confirm + 1) ~ t, data = final_period) ## Estimate unreported future infections using a log linear model shifted_reported_cases <- shifted_reported_cases[ , @@ -151,7 +179,7 @@ create_shifted_cases <- function(reported_cases, shift, , confirm := data.table::fifelse( t >= 7, - exp(lm_model$coefficients[1] + lm_model$coefficients[2] * t), + exp(lm_model$coefficients[1] + lm_model$coefficients[2] * t) - 1, confirm ) ][, t := NULL] diff --git a/R/estimate_infections.R b/R/estimate_infections.R index 75d6845cd..e326c87b7 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -164,7 +164,7 @@ estimate_infections <- function(reported_cases, # Record earliest date with data start_date <- min(reported_cases$date, na.rm = TRUE) - seeding_time <- get_seeding_time(delays, generation_time) + seeding_time <- get_seeding_time(delays, generation_time, rt) # Create mean shifted reported cases as prior reported_cases <- data.table::rbindlist(list( diff --git a/R/get.R b/R/get.R index 6f272d717..bb37175cc 100644 --- a/R/get.R +++ b/R/get.R @@ -319,20 +319,14 @@ get_regions_with_most_reports <- function(reported_cases, ##' ##' The seeding time is set to the mean of the specified delays, constrained ##' to be at least the maximum generation time -##' @param delays Delays specified using distribution functions such as -##' [Gamma()] or [LogNormal()] -##' @param generation_time Generation specified using distribution functions -##' such as [Gamma()] or [LogNormal()] +##' @inheritParams estimate_infections ##' @return An integer seeding time -get_seeding_time <- function(delays, generation_time) { +get_seeding_time <- function(delays, generation_time, rt = rt_opts()) { # Estimate the mean delay ----------------------------------------------- seeding_time <- sum(mean(delays, ignore_uncertainty = TRUE)) - if (seeding_time < 1) { - seeding_time <- 1 - } else { - seeding_time <- round(seeding_time) + if (!is.null(rt)) { + ## make sure we have at least (length of total gt pmf - 1) seeding time + seeding_time <- max(seeding_time, sum(max(generation_time))) } - ## make sure we have at least (length of total gt pmf - 1) seeding time - seeding_time <- max(seeding_time, sum(max(generation_time))) - return(seeding_time) + return(max(round(seeding_time), 1)) } diff --git a/man/create_shifted_cases.Rd b/man/create_shifted_cases.Rd index 0e9463316..011e09751 100644 --- a/man/create_shifted_cases.Rd +++ b/man/create_shifted_cases.Rd @@ -30,6 +30,34 @@ using a centred partial rolling average (with a period set by \code{\link[=estimate_infections]{estimate_infections()}} to generate the mean shifted prior on which the back calculation method (see \code{\link[=backcalc_opts]{backcalc_opts()}}) is based. } +\details{ +The function first shifts all the data back in time by \code{shift} days (thus +discarding the first \code{shift} days of data) and then applies a centred +rolling mean of length \code{smoothing_window} to the shifted data except for +the final period. The final period (the forecast horizon plus half the +smoothing window) is instead replaced by a log-linear model fit (with 1 +added to the data for fitting to avoid zeroes and later subtracted again), +projected to the end of the forecast horizon. The initial part of the data +(corresponding to the length of the smoothing window) is then removed, and +any non-integer resulting values rounded up. +} \examples{ -create_shifted_cases(example_confirmed, 7, 14, 7) +shift <- 7 +horizon <- 7 +smoothing_window <- 14 +## add NAs for horizon +cases <- create_clean_reported_cases(example_confirmed, horizon = horizon) +## add zeroes initially +cases <- data.table::rbindlist(list( + data.table::data.table( + date = seq( + min(cases$date) - smoothing_window, + min(cases$date) - 1, + by = "days" + ), + confirm = 0, breakpoint = 0 + ), + cases + )) +create_shifted_cases(cases, shift, smoothing_window, horizon) } diff --git a/man/get_seeding_time.Rd b/man/get_seeding_time.Rd index 862ebfc80..98df076c1 100644 --- a/man/get_seeding_time.Rd +++ b/man/get_seeding_time.Rd @@ -4,14 +4,20 @@ \alias{get_seeding_time} \title{Estimate seeding time from delays and generation time} \usage{ -get_seeding_time(delays, generation_time) +get_seeding_time(delays, generation_time, rt = rt_opts()) } \arguments{ -\item{delays}{Delays specified using distribution functions such as -\code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}}} +\item{delays}{A call to \code{\link[=delay_opts]{delay_opts()}} defining delay distributions and +options. See the documentation of \code{\link[=delay_opts]{delay_opts()}} and the examples below for +details.} -\item{generation_time}{Generation specified using distribution functions -such as \code{\link[=Gamma]{Gamma()}} or \code{\link[=LogNormal]{LogNormal()}}} +\item{generation_time}{A call to \code{\link[=generation_time_opts]{generation_time_opts()}} defining the +generation time distribution used. For backwards compatibility a list of +summary parameters can also be passed.} + +\item{rt}{A list of options as generated by \code{\link[=rt_opts]{rt_opts()}} defining Rt +estimation. Defaults to \code{\link[=rt_opts]{rt_opts()}}. Set to \code{NULL} to switch to using back +calculation rather than generating infections using Rt.} } \value{ An integer seeding time