diff --git a/R/iidda/NAMESPACE b/R/iidda/NAMESPACE index 62af8b7..8e8cc59 100644 --- a/R/iidda/NAMESPACE +++ b/R/iidda/NAMESPACE @@ -32,14 +32,11 @@ export(extract_all_between_paren) export(extract_between_paren) export(extract_char_or_blank) export(extract_or_blank) -export(factor_time_scale) export(failed_prep_script_outcomes) export(fill_and_wrap) export(fill_re_template) export(filter_dependencies) -export(find_unaccounted_cases) export(fix_csv) -export(flatten_disease_hierarchy) export(freq_to_by) export(freq_to_days) export(get_all_dependencies) @@ -47,7 +44,6 @@ export(get_canmod_digitization_metadata) export(get_dataset_path) export(get_elements) export(get_firsts) -export(get_implied_zeros) export(get_items) export(get_lookup_table) export(get_main_script) @@ -62,7 +58,6 @@ export(identify_scales) export(iidda_data_dictionary) export(iidda_from_single_file) export(is_empty) -export(is_leaf_disease) export(iso_3166_codes) export(iso_8601_dateranges) export(iso_8601_dates) @@ -90,7 +85,6 @@ export(melt_tracking_table_keys) export(memoise_remove_age) export(nlist) export(normalize_diseases) -export(normalize_time_scales) export(open_locally) export(or_pattern) export(pipeline_exploration_starter) diff --git a/R/iidda/R/data_prep_tools.R b/R/iidda/R/data_prep_tools.R index 2657b11..955fc31 100644 --- a/R/iidda/R/data_prep_tools.R +++ b/R/iidda/R/data_prep_tools.R @@ -701,108 +701,6 @@ add_basal_disease = function(data, lookup){ with_basal } -#' Is Leaf Disease -#' -#' Given a set of `disease`-`nesting_disease` pairs that all share the same -#' \code{\link{basal_disease}}, -#' -#' @param disease Disease name vector. -#' @param nesting_disease Vector of the same length as \code{disease} giving -#' the nesting diseases of element in \code{disease}. -#' -#' @return True if disease is never a nesting disease (it is a leaf disease), -#' False if disease is a nesting disease. -#' -#' @export -is_leaf_disease = function(disease, nesting_disease) !disease %in% unique(nesting_disease) - -#' Flatten Disease Hierarchy -#' -#' Take a tidy data set with a potentially complex disease hierarchy -#' and flatten this hierarchy so that, at any particular time and location -#' (or some other context), all diseases in the `disease` column have the -#' same `nesting_disease`. -#' -#' @param data A tidy data set with the following minimal set of columns: -#' `disease`, `nesting_disease`, `period_start_date`, `period_end_date`, -#' and `location`. Note that the latter three can be modified with -#' `grouping_columns`. -#' @param disease_lookup A lookup table with `disease` and `nesting_disease` -#' columns that describe a global disease hierarchy that will be applied -#' locally to flatten disease hierarchy at each point in time and space -#' in the tidy data set in the `data` argument. -#' @param grouping_columns Character vector of column names to use when -#' grouping to determine the context. -#' @param basal_diseases_to_prune Character vector of `disease`s to -#' remove from `data`. -#' @param specials_pattern Optional regular expression to use to match -#' `disease` names in `data` that should be added to the lookup table. This -#' is useful for disease names that are not historical and produced for -#' harmonization purposes. The most common example is `"_unaccounted$"`, -#' which is the default. Setting this argument to `NULL` avoids adding -#' any special disease names to the lookup table. -#' -#' @export -flatten_disease_hierarchy = function(data - , disease_lookup - , grouping_columns = c("period_start_date", "period_end_date", "location") - , basal_diseases_to_prune = character() - , specials_pattern = "_unaccounted$" -) { - - # only need the lookup table to infer the hierarchy - disease_lookup = (disease_lookup - |> select(disease, nesting_disease) - |> distinct() - ) - - if (!is.null(specials_pattern)) { - specials = (data - |> filter(grepl(specials_pattern, disease)) - |> select(disease, nesting_disease) - |> distinct() - ) - disease_lookup = bind_rows(disease_lookup, specials) - } - pruned_lookup = (disease_lookup - |> filter(!disease %in% basal_diseases_to_prune) - |> mutate(nesting_disease = ifelse( - nesting_disease %in% basal_diseases_to_prune - , '' - , nesting_disease - ) - ) - ) - (data - - # getting basal disease for all diseases - |> rowwise() - |> mutate(basal_disease = basal_disease(disease, disease_lookup)) - |> ungroup() - - # prune basal_diseases - |> mutate(x = disease %in% basal_diseases_to_prune) - |> mutate(y = nesting_disease %in% basal_diseases_to_prune) - |> mutate(z = basal_disease %in% basal_diseases_to_prune) - - |> filter(!x) - |> mutate(nesting_disease = ifelse(y, "", nesting_disease)) - |> rowwise() - |> mutate(basal_disease = ifelse(z, basal_disease(disease, pruned_lookup), basal_disease)) - |> ungroup() - - # keeping only leaf diseases - |> group_by(across(c("basal_disease", all_of(grouping_columns)))) # period_start_date, period_end_date, location, basal_disease) - |> filter(is_leaf_disease(disease, nesting_disease)) - |> ungroup() - - # if there is only the basal disease (no sub-diseases), differentiate by adding '-only' - # mutate(disease = ifelse(disease == basal_disease, sprintf("%s-only", disease), disease)) - # mutate(nesting_disease = basal_disease) - |> select(-x, -y, -z) - - ) -} ## TODO: user-facing function to flatten the disease hierarchy. should probably ## be in iidda.analysis because it will make use of the api to get a @@ -813,339 +711,3 @@ aggregate_disease_hierarchy = function(data, ...) { # group by nesting_disease etc ... ) } - -time_scale_chooser = function(time_scale, which_fun) { - time_scale_order = c("wk", "2wk", "mo", "qr", "yr") - time_scale = as.character(time_scale) - bad_scale = !time_scale %in% time_scale_order - if (any(bad_scale)) { - these_bad_scales = paste0(time_scale[bad_scale], collapse = ", ") - stop( - "\nThese scales where found in the data but are not on the valid list:\n" - , these_bad_scales, - , "\nValid scales include these:\n" - , paste0(time_scale_order, collapse = ", ") - ) - } - time_scale_factor = factor(time_scale, levels = time_scale_order) - r = time_scale[which_fun(as.numeric(time_scale_factor))] - if (length(r) != 1L) stop("Unable to choose a single time scale.") - r -} - -#' Factor Time Scale -#' -#' @param data A tidy data set with a `time_scale` column. -#' -#' @return A data set with a factored time_scale column. -#' -#' @export -factor_time_scale = function(data){ - if (is.factor(data$time_scale)) { - return(data) - } - time_scale_map = c(wk = "wk", yr = "yr", mo = "mo", `2wk` = "2wk", mt = "mo", `two-wks` = "2wk", qrtr = "qr", qr = "qr") - data$time_scale = time_scale_map[as.character(data$time_scale)] - order = c("wk", "2wk", "mo", "qr", "yr") - - return(mutate(data, time_scale = factor(data$time_scale, levels = order, ordered = TRUE))) -} - -#' Filter out Time Scales OLD -#' -#' Choose a single best `time_scale` for each year in a dataset, grouped by -#' nesting disease. This best `time_scale` is defined as the longest -#' of the shortest time scales in each location and sub-disease. -#' -#' @param data A tidy data set with a `time_scale` column. -#' @param initial_group Character vector naming columns for defining -#' the initial grouping used to compute the shortest time scales. -#' @param final_group Character vector naming columns for defining the final -#' grouping used to compute the longest of the shortest time scales. -#' @param cleanup Should intermediate columns be removed before returning the -#' output -#' -#' @return A data set only containing records with the best time scale. -#' -#' @importFrom lubridate year -#' @noRd -filter_out_time_scales_old = function(data - , initial_group = c("iso_3166", "iso_3166_2", "disease", "nesting_disease") - , final_group = c("basal_disease") - , cleanup = TRUE - ) { - time_scale_map = c( - wk = "wk", yr = "yr", mo = "mo", `2wk` = "2wk", mt = "mo" - , `two-wks` = "2wk", qrtr = "qr", qr = "qr" - ) - data$time_scale = time_scale_map[as.character(data$time_scale)] - if (length(unique(data$time_scale)) == 1L) return(data) - new_data = (data - |> mutate(year = year(period_end_date)) - |> group_by(across(all_of(c("year", initial_group)))) - |> mutate(shortest_time_scale = time_scale_chooser(time_scale, which.min)) - |> ungroup() - |> group_by(across(all_of(c("year", final_group)))) - |> mutate(best_time_scale = time_scale_chooser(shortest_time_scale, which.max)) - |> ungroup() - |> filter(as.character(time_scale) == best_time_scale) - ) - if (isTRUE(cleanup)) { - new_data = select(new_data - , -year, -shortest_time_scale, -best_time_scale - ) - } - new_data -} - - -#' Normalize Time Scales -#' -#' Choose a single best `time_scale` for each year in a dataset, grouped by -#' nesting disease. This best `time_scale` is defined as the longest -#' of the shortest time scales in each location and sub-disease. -#' -#' @param data A tidy data set with a `time_scale` and `year` column -#' @param initial_group Character vector naming columns for defining -#' the initial grouping used to compute the shortest time scales. -#' @param final_group Character vector naming columns for defining the final -#' grouping used to compute the longest of the shortest time scales. -#' @param get_implied_zeros Add zeros that are implied by a '0' reported at a coarser timescale. -#' @param aggregate_if_unavailable If a location is not reporting for the determined -#' 'best timescale', but is reporting at a finer timescale, aggregate this finer -#' timescale to the 'best timescale' -#' -#' @return A data set only containing records with the optimal time scale. -#' -#' @importFrom lubridate year -#' @export -normalize_time_scales = function(data - , initial_group = c("year", "iso_3166", "iso_3166_2", "disease", "nesting_disease", "basal_disease") - , final_group = c("basal_disease") - , get_implied_zeros = TRUE - , aggregate_if_unavailable = TRUE -) { - - if(get_implied_zeros) data = get_implied_zeros(data) - - if (length(unique(data$time_scale)) == 1L) return(data) - - if (!"year" %in% colnames(data)) {stop("The column 'year' does not exist in the dataset.")} - - new_data = (data - # remove '_unaccounted' cases when deciding best time_scale - |> factor_time_scale() - |> filter(!grepl("_unaccounted$", disease)) - # |> mutate(year = year(period_end_date)) - |> group_by(across(all_of(c("year", initial_group)))) - |> mutate(shortest_time_scale = time_scale_chooser(time_scale, which.min)) - |> ungroup() - |> group_by(across(all_of(c("year", final_group)))) - |> mutate(best_time_scale = time_scale_chooser(shortest_time_scale, which.max)) - |> ungroup() - |> filter(as.character(time_scale) == best_time_scale) - |> select(-best_time_scale, -shortest_time_scale) - ) - - # adding "unaccounted" data back, at the best_time_scale - all_new_data = (data - |> filter(grepl("_unaccounted$", disease)) - |> mutate(year = year(period_end_date)) - |> semi_join(select(new_data, "year", "time_scale", "disease", "nesting_disease", "basal_disease") |> unique(), - by = c("year", "time_scale", final_group)) - |> rbind(new_data) - ) - - if(aggregate_if_unavailable) { - - # coarse scales to aggregate to - scales = (all_new_data - |> select(period_start_date, period_end_date, disease, nesting_disease, basal_disease) - |> unique() - |> rename(coarser_start_date = period_start_date, - coarser_end_date = period_end_date) - ) - - # data which isn't available at 'best_time_scale' for the year, but is - # available at a finer timescale - data_to_aggregate = (data - |> factor_time_scale() - # |> mutate(year = year(period_end_date)) - |> left_join(select(all_new_data, "year","disease", "nesting_disease", "basal_disease", "time_scale") |> unique(), - by = c("year", "disease", "nesting_disease", "basal_disease"), - suffix = c('_old', '_new')) - |> filter(time_scale_old < time_scale_new) - |> mutate(period_start_date = as.Date(period_start_date), - period_end_date = as.Date(period_end_date)) - - # keep only data which isn't available at the 'best time scale' (which is now the timescale in all_new_data) - |> anti_join(select(all_new_data,"iso_3166_2", "year","disease", "nesting_disease", "basal_disease", "time_scale") |> unique() - , by = c('time_scale_new' = 'time_scale', 'disease', 'year', 'nesting_disease','basal_disease', 'iso_3166_2')) - ) - - aggregated_unavailable_data = (scales - |> inner_join(data_to_aggregate, by = c("disease", "nesting_disease", "basal_disease"), relationship = 'many-to-many') - |> filter(period_end_date > coarser_start_date & period_end_date <= coarser_end_date) - |> select(names(data_to_aggregate), coarser_start_date, coarser_end_date) - - |> group_by(iso_3166, iso_3166_2, disease, nesting_disease, basal_disease, coarser_start_date, coarser_end_date) - |> mutate(cases_coarse_period = sum(as.numeric(cases_this_period))) - |> mutate(population = round(mean(as.numeric(population))), - population_reporting = round(mean(as.numeric(population_reporting)))) - |> ungroup() - - |> select(-cases_this_period, -period_start_date, -period_end_date, - -days_this_period, -period_mid_date) - |> rename(time_scale = time_scale_new, - cases_this_period = cases_coarse_period, - period_start_date = coarser_start_date, - period_end_date = coarser_end_date) - - |> distinct(iso_3166, iso_3166_2, disease, nesting_disease, basal_disease, - period_start_date, period_end_date, .keep_all = TRUE) - - # add back days_this_period and period_mid_date for the coarser start and end dates - # FIXME: apparently using iidda analysis functions will cause issues. oops - |> mutate(days_this_period = iidda.analysis::num_days(period_start_date, period_end_date)) - |> mutate(period_mid_date = iidda.analysis::mid_dates(period_start_date, period_end_date, days_this_period)) - |> select(-time_scale_old) - |> mutate(record_origin = 'derived-aggregated-timescales') - ) - - final = (all_new_data - |> mutate(record_origin = ifelse("record_origin" %in% names(all_new_data), record_origin, 'historical')) - |> rbind(aggregated_unavailable_data) - ) - - return(final) - } else{ - return(all_new_data) - } -} - -#' Get Implied Zeros -#' -#' Add zeros to data set that are implied by a '0' reported at a coarser timescale. -#' -#' @param data A tidy data set -#' -#' @return A tidy data set with inferred 0s -#' -#' @export -get_implied_zeros = function(data){ - - starting_data = (data - |> mutate(year = year(as.Date(period_end_date))) - |> factor_time_scale() - - |> group_by(iso_3166_2, disease, year, original_dataset_id) - |> mutate(all_zero = ifelse(sum(as.numeric(cases_this_period)) == 0, TRUE, FALSE)) - |> ungroup() - - |> group_by(disease, year, original_dataset_id) - |> mutate(finest_timescale = min(time_scale)) - |> ungroup() - ) - - scales = (starting_data - |> filter(time_scale == finest_timescale) - |> distinct(disease, nesting_disease, basal_disease, - year, time_scale, period_start_date, period_end_date, - period_mid_date, days_this_period, original_dataset_id) -) - - # records for which all_zero = true and finest_timescale isn't available - get_new_zeros = (starting_data - |> filter(time_scale > finest_timescale, all_zero) - - # filter for timescales that are not in the original data - |> anti_join(starting_data - , by = c('iso_3166_2', 'year', 'finest_timescale' = 'time_scale', - 'disease', 'nesting_disease', 'basal_disease', 'dataset_id' - )) # nesting/basal too? see if that changes result! - - |> select(-period_start_date, -period_end_date, -period_mid_date, - -days_this_period) - ) - - # for rows in get_new_records, find the periods (i.e. start and end dates) - # for the finest_timescale for a given year, disease, and original_dataset_id - new_zeros = (get_new_zeros - |> left_join(scales, by = c('disease', 'year', 'finest_timescale' = 'time_scale', - 'nesting_disease', 'basal_disease', 'original_dataset_id'), - relationship = "many-to-many") - |> select(-time_scale, -year, -all_zero) - |> rename(time_scale = finest_timescale) - - |> mutate(record_origin = 'derived-implied-zero') - ) - - # join back to original data - (data - |> mutate(record_origin = ifelse("record_origin" %in% names(data), record_origin, 'historical')) - |> rbind(new_zeros) - ) -} - - -#' Find Unaccounted Cases -#' -#' Make new records for instances when the sum of leaf diseases is less than -#' the reported total for their basal disease. The difference between these -#' counts gets disease name 'basal_disease'_unaccounted'. -#' -#' -#' @param data A tidy data set with a `basal_disease` column. -#' -#' @return A data set containing records that are the difference between a -#' reported total for a basal_disease and the sum of their leaf diseases -#' -#' @export -find_unaccounted_cases = function(data){ - - # check if sum of leaf diseases = reported sum of basal disease - sum_of_leaf = ( - data - %>% filter(!disease %in% unique(nesting_disease)) - %>% filter(disease != basal_disease) - %>% group_by(iso_3166, iso_3166_2, period_start_date, period_end_date, nesting_disease) - %>% summarise(cases_this_period = sum(as.numeric(cases_this_period))) - ) - - reported_totals = ( - data - %>% filter(nesting_disease == '') - %>% filter(disease %in% sum_of_leaf$nesting_disease) - %>% select(-nesting_disease) - %>% rename(nesting_disease = disease) - ) - - # if sum of leaf diseases is < reported sum of basal disease, - # make new sub-disease called 'disease-name'_unaccounted, which contains - # the difference between sum of leaf diseases and the reported sum of the disease - unaccounted_data = - (inner_join(sum_of_leaf, reported_totals, by = - c('iso_3166', 'iso_3166_2', 'period_start_date', 'period_end_date', 'nesting_disease'), - suffix = c('_sum', '_reported')) - - %>% mutate(cases_this_period_reported = as.numeric(cases_this_period_reported), - cases_this_period_sum = as.numeric(cases_this_period_sum)) - - %>% filter(cases_this_period_sum < cases_this_period_reported) - %>% mutate(diff = cases_this_period_reported - cases_this_period_sum) - %>% rename(cases_this_period = diff) - %>% mutate(disease = paste(nesting_disease, 'unaccounted', sep = '_')) - - %>% select(-cases_this_period_reported, -cases_this_period_sum) - - %>% mutate(original_dataset_id = '') - %>% mutate(historical_disease = '') - %>% mutate(record_origin = 'derived-unaccounted-cases') - ) - - (data - %>% mutate(record_origin = ifelse("record_origin" %in% names(data), record_origin, 'historical')) - %>% rbind(unaccounted_data) - ) -} - diff --git a/R/iidda/man/factor_time_scale.Rd b/R/iidda/man/factor_time_scale.Rd deleted file mode 100644 index 22e7784..0000000 --- a/R/iidda/man/factor_time_scale.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_prep_tools.R -\name{factor_time_scale} -\alias{factor_time_scale} -\title{Factor Time Scale} -\usage{ -factor_time_scale(data) -} -\arguments{ -\item{data}{A tidy data set with a `time_scale` column.} -} -\value{ -A data set with a factored time_scale column. -} -\description{ -Factor Time Scale -} diff --git a/R/iidda/man/find_unaccounted_cases.Rd b/R/iidda/man/find_unaccounted_cases.Rd deleted file mode 100644 index 815b9fa..0000000 --- a/R/iidda/man/find_unaccounted_cases.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_prep_tools.R -\name{find_unaccounted_cases} -\alias{find_unaccounted_cases} -\title{Find Unaccounted Cases} -\usage{ -find_unaccounted_cases(data) -} -\arguments{ -\item{data}{A tidy data set with a `basal_disease` column.} -} -\value{ -A data set containing records that are the difference between a -reported total for a basal_disease and the sum of their leaf diseases -} -\description{ -Make new records for instances when the sum of leaf diseases is less than -the reported total for their basal disease. The difference between these -counts gets disease name 'basal_disease'_unaccounted'. -} diff --git a/R/iidda/man/flatten_disease_hierarchy.Rd b/R/iidda/man/flatten_disease_hierarchy.Rd deleted file mode 100644 index 832a967..0000000 --- a/R/iidda/man/flatten_disease_hierarchy.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_prep_tools.R -\name{flatten_disease_hierarchy} -\alias{flatten_disease_hierarchy} -\title{Flatten Disease Hierarchy} -\usage{ -flatten_disease_hierarchy( - data, - disease_lookup, - grouping_columns = c("period_start_date", "period_end_date", "location"), - basal_diseases_to_prune = character(), - specials_pattern = "_unaccounted$" -) -} -\arguments{ -\item{data}{A tidy data set with the following minimal set of columns: -`disease`, `nesting_disease`, `period_start_date`, `period_end_date`, -and `location`. Note that the latter three can be modified with -`grouping_columns`.} - -\item{disease_lookup}{A lookup table with `disease` and `nesting_disease` -columns that describe a global disease hierarchy that will be applied -locally to flatten disease hierarchy at each point in time and space -in the tidy data set in the `data` argument.} - -\item{grouping_columns}{Character vector of column names to use when -grouping to determine the context.} - -\item{basal_diseases_to_prune}{Character vector of `disease`s to -remove from `data`.} - -\item{specials_pattern}{Optional regular expression to use to match -`disease` names in `data` that should be added to the lookup table. This -is useful for disease names that are not historical and produced for -harmonization purposes. The most common example is `"_unaccounted$"`, -which is the default. Setting this argument to `NULL` avoids adding -any special disease names to the lookup table.} -} -\description{ -Take a tidy data set with a potentially complex disease hierarchy -and flatten this hierarchy so that, at any particular time and location -(or some other context), all diseases in the `disease` column have the -same `nesting_disease`. -} diff --git a/R/iidda/man/get_implied_zeros.Rd b/R/iidda/man/get_implied_zeros.Rd deleted file mode 100644 index d645247..0000000 --- a/R/iidda/man/get_implied_zeros.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_prep_tools.R -\name{get_implied_zeros} -\alias{get_implied_zeros} -\title{Get Implied Zeros} -\usage{ -get_implied_zeros(data) -} -\arguments{ -\item{data}{A tidy data set} -} -\value{ -A tidy data set with inferred 0s -} -\description{ -Add zeros to data set that are implied by a '0' reported at a coarser timescale. -} diff --git a/R/iidda/man/is_leaf_disease.Rd b/R/iidda/man/is_leaf_disease.Rd deleted file mode 100644 index d140485..0000000 --- a/R/iidda/man/is_leaf_disease.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_prep_tools.R -\name{is_leaf_disease} -\alias{is_leaf_disease} -\title{Is Leaf Disease} -\usage{ -is_leaf_disease(disease, nesting_disease) -} -\arguments{ -\item{disease}{Disease name vector.} - -\item{nesting_disease}{Vector of the same length as \code{disease} giving -the nesting diseases of element in \code{disease}.} -} -\value{ -True if disease is never a nesting disease (it is a leaf disease), -False if disease is a nesting disease. -} -\description{ -Given a set of `disease`-`nesting_disease` pairs that all share the same -\code{\link{basal_disease}}, -} diff --git a/R/iidda/man/normalize_time_scales.Rd b/R/iidda/man/normalize_time_scales.Rd deleted file mode 100644 index d58d140..0000000 --- a/R/iidda/man/normalize_time_scales.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data_prep_tools.R -\name{normalize_time_scales} -\alias{normalize_time_scales} -\title{Normalize Time Scales} -\usage{ -normalize_time_scales( - data, - initial_group = c("year", "iso_3166", "iso_3166_2", "disease", "nesting_disease", - "basal_disease"), - final_group = c("basal_disease"), - get_implied_zeros = TRUE, - aggregate_if_unavailable = TRUE -) -} -\arguments{ -\item{data}{A tidy data set with a `time_scale` and `year` column} - -\item{initial_group}{Character vector naming columns for defining -the initial grouping used to compute the shortest time scales.} - -\item{final_group}{Character vector naming columns for defining the final -grouping used to compute the longest of the shortest time scales.} - -\item{get_implied_zeros}{Add zeros that are implied by a '0' reported at a coarser timescale.} - -\item{aggregate_if_unavailable}{If a location is not reporting for the determined -'best timescale', but is reporting at a finer timescale, aggregate this finer -timescale to the 'best timescale'} -} -\value{ -A data set only containing records with the optimal time scale. -} -\description{ -Choose a single best `time_scale` for each year in a dataset, grouped by -nesting disease. This best `time_scale` is defined as the longest -of the shortest time scales in each location and sub-disease. -}