Skip to content

Commit

Permalink
iidda-staging support
Browse files Browse the repository at this point in the history
  • Loading branch information
stevencarlislewalker committed Sep 14, 2024
1 parent 1829b2a commit 114c106
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 6 deletions.
2 changes: 1 addition & 1 deletion R/iidda/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: iidda
Title: Processing Infectious Disease Datasets in IIDDA.
Version: 0.3.1
Version: 0.4.0
Maintainer: Steve Walker <swalk@mcmaster.ca>
Authors@R:
c(person("Steve Walker", rol = c("aut", "cre"), email = "swalk@mcmaster.ca"),
Expand Down
2 changes: 2 additions & 0 deletions R/iidda/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export(make_source_directory)
export(make_source_metadata)
export(melt_tracking_table_keys)
export(memoise_remove_age)
export(mock_api_hook)
export(nlist)
export(non_numeric_report)
export(normalize_diseases)
Expand Down Expand Up @@ -131,6 +132,7 @@ export(save_result)
export(set_ext)
export(set_iidda_col_types)
export(set_types)
export(source_from_digitization_id)
export(sprintf_named)
export(statcan_mort_prep)
export(strip_blob_github)
Expand Down
12 changes: 8 additions & 4 deletions R/iidda/R/contribution_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ register_prep_script = function(script_path, type) {
|> basename()
|> tools::file_path_sans_ext()
)
derived_path = sprintf("derived-data/%s/%s.csv", dataset_id)
derived_path = sprintf("derived-data/%s/%s.csv", dataset_id, dataset_id)
lines = readLines(script_path)
p = data.frame(x = character())
quoted_frame = strcapture('"([^"]+)"', lines, proto = p)
quoted_frame = rbind(
strcapture('"([^"]+)"', lines, proto = p)
, strcapture("'([^']+)'", lines, proto = p)
)
quoted = (quoted_frame
|> unlist(use.names = FALSE)
|> na.omit()
Expand Down Expand Up @@ -60,12 +63,12 @@ register_prep_script = function(script_path, type) {
"the following dataset already exists so previously saved data is being used to inform the metadata:\n "
, derived_path
)
message()
} else {
message(
"creating the following dataset to potentially learn about metadata from the data:\n "
, derived_path
)
dir.create(dirname(derived_path), showWarnings = FALSE)
data_env = new.env(parent = parent.frame())
sys.source(script_path, envir = data_env)
rm(list = ls(data_env), envir = data_env)
Expand All @@ -91,6 +94,7 @@ register_prep_script = function(script_path, type) {
if (all(date_fields %in% fields)) {
periods = data[date_fields] |> unlist() |> range()
} else {
warning("The period_start_date and period_end_date fields are not in the data. This is fine, but it will require manual entry of these fields in the metadata for the derived dataset")
periods = rep("", times = 2L)
}

Expand Down Expand Up @@ -167,7 +171,7 @@ build_metadata_filepath = function(name, type) {
#' @param source Source ID.
#' @export
make_resource_metadata = function(source) {
types = c("prep-scripts", "access-scripts", "scans", "digitization")
types = c("prep-scripts", "access-scripts", "scans", "digitizations")
for (type in types) make_resource_metadata_type(source, type)
}

Expand Down
2 changes: 2 additions & 0 deletions R/iidda/R/lookup.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ resource_type_dict = list(
, ACM = "All-Cause Mortality"
, Births = "Births"
, Plague = "Plague" ## TODO: should this really be a dataset type? too specific to LBoM?
, Schedule = "Schedule"
, CDI_CANMOD = "CANMOD CDI"
, CDI_CANMOD_NORMALIZED = "CANMOD CDI NORMALIZED"
, Compilation = "Compilation"
, Check = "Check"
)

#' Frequency to By
Expand Down
29 changes: 29 additions & 0 deletions R/iidda/R/mock_api_hook.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' Mock API Hook
#'
#' @param repo_path Path to an IIDDA repository.
#'
#' @export
mock_api_hook = function(repo_path) {
list(
raw_csv = function(dataset_ids, ...) {
paths = sprintf("derived-data/%s/%s.csv", dataset_ids, dataset_ids)
(repo_path
|> file.path(paths)
|> lapply(read_data_frame)
#lapply(iidda.api:::parse_columns)
|> setNames(dataset_ids)
|> dplyr::bind_rows(.id = "dataset_id")
)
}
, lookup_tables = function(lookup_type = "location") {
path = sprintf("lookup-tables/%s.csv", lookup_type)
(repo_path
|> file.path(path)
|> read_data_frame()
)
}
, metadata = function(...) {
stop("need to use the real api when getting metadata. cannot use this mock api")
}
)
}
17 changes: 16 additions & 1 deletion R/iidda/R/repo_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -476,4 +476,19 @@ error_tar = function(tar_name) {
tar(tarfile = tar_path, files = f)
}


#' Source from Digitization ID
#'
#' @param digitization_ids Character vector of digitization IDs
#' @return Character vector of source IDs associated with digitization.
#' @export
source_from_digitization_id = function(digitization_ids) {
u = unique(digitization_ids)
l = setNames(
vapply(u
, \(x) Sys.glob(sprintf("pipelines/*/digitizations/%s.*.json", x))
, character(1L)
) |> dirname() |> dirname() |> basename()
, u
)
l[digitization_ids]
}
14 changes: 14 additions & 0 deletions R/iidda/man/mock_api_hook.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions R/iidda/man/source_from_digitization_id.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 114c106

Please sign in to comment.