iidda-staging support

canmod · Sep 14, 2024 · 114c106 · 114c106
1 parent 1829b2a
commit 114c106
Show file tree

Hide file tree

Showing 8 changed files with 89 additions and 6 deletions.
diff --git a/R/iidda/DESCRIPTION b/R/iidda/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: iidda
 Title: Processing Infectious Disease Datasets in IIDDA.
-Version: 0.3.1
+Version: 0.4.0
 Maintainer: Steve Walker <swalk@mcmaster.ca>
 Authors@R: 
   c(person("Steve Walker", rol = c("aut", "cre"), email = "swalk@mcmaster.ca"),

diff --git a/R/iidda/NAMESPACE b/R/iidda/NAMESPACE
@@ -92,6 +92,7 @@ export(make_source_directory)
 export(make_source_metadata)
 export(melt_tracking_table_keys)
 export(memoise_remove_age)
+export(mock_api_hook)
 export(nlist)
 export(non_numeric_report)
 export(normalize_diseases)
@@ -131,6 +132,7 @@ export(save_result)
 export(set_ext)
 export(set_iidda_col_types)
 export(set_types)
+export(source_from_digitization_id)
 export(sprintf_named)
 export(statcan_mort_prep)
 export(strip_blob_github)

diff --git a/R/iidda/R/contribution_helpers.R b/R/iidda/R/contribution_helpers.R
@@ -24,10 +24,13 @@ register_prep_script = function(script_path, type) {
     |> basename()
     |> tools::file_path_sans_ext()
   )
-  derived_path = sprintf("derived-data/%s/%s.csv", dataset_id)
+  derived_path = sprintf("derived-data/%s/%s.csv", dataset_id, dataset_id)
   lines = readLines(script_path)
   p = data.frame(x = character())
-  quoted_frame = strcapture('"([^"]+)"', lines, proto = p)
+  quoted_frame = rbind(
+      strcapture('"([^"]+)"', lines, proto = p)
+    , strcapture("'([^']+)'", lines, proto = p)
+  )
   quoted = (quoted_frame
     |> unlist(use.names = FALSE)
     |> na.omit()
@@ -60,12 +63,12 @@ register_prep_script = function(script_path, type) {
         "the following dataset already exists so previously saved data is being used to inform the metadata:\n    "
       , derived_path
     )
-    message()
   } else {
     message(
         "creating the following dataset to potentially learn about metadata from the data:\n    "
       , derived_path
     )
+    dir.create(dirname(derived_path), showWarnings = FALSE)
     data_env = new.env(parent = parent.frame())
     sys.source(script_path, envir = data_env)
     rm(list = ls(data_env), envir = data_env)
@@ -91,6 +94,7 @@ register_prep_script = function(script_path, type) {
   if (all(date_fields %in% fields)) {
     periods = data[date_fields] |> unlist() |> range()
   } else {
+    warning("The period_start_date and period_end_date fields are not in the data. This is fine, but it will require manual entry of these fields in the metadata for the derived dataset")
     periods = rep("", times = 2L)
   }
 
@@ -167,7 +171,7 @@ build_metadata_filepath = function(name, type) {
 #' @param source Source ID.
 #' @export
 make_resource_metadata = function(source) {
-  types = c("prep-scripts", "access-scripts", "scans", "digitization")
+  types = c("prep-scripts", "access-scripts", "scans", "digitizations")
   for (type in types) make_resource_metadata_type(source, type)
 }
 

diff --git a/R/iidda/R/lookup.R b/R/iidda/R/lookup.R
@@ -29,9 +29,11 @@ resource_type_dict = list(
   , ACM = "All-Cause Mortality"
   , Births = "Births"
   , Plague = "Plague" ## TODO: should this really be a dataset type? too specific to LBoM?
+  , Schedule = "Schedule"
   , CDI_CANMOD = "CANMOD CDI"
   , CDI_CANMOD_NORMALIZED = "CANMOD CDI NORMALIZED"
   , Compilation = "Compilation"
+  , Check = "Check"
 )
 
 #' Frequency to By

diff --git a/R/iidda/R/mock_api_hook.R b/R/iidda/R/mock_api_hook.R
@@ -0,0 +1,29 @@
+#' Mock API Hook
+#'
+#' @param repo_path Path to an IIDDA repository.
+#'
+#' @export
+mock_api_hook = function(repo_path) {
+  list(
+      raw_csv = function(dataset_ids, ...) {
+        paths = sprintf("derived-data/%s/%s.csv", dataset_ids, dataset_ids)
+        (repo_path
+          |> file.path(paths)
+          |> lapply(read_data_frame)
+          #lapply(iidda.api:::parse_columns)
+          |> setNames(dataset_ids)
+          |> dplyr::bind_rows(.id = "dataset_id")
+        )
+      }
+    , lookup_tables = function(lookup_type = "location") {
+        path = sprintf("lookup-tables/%s.csv", lookup_type)
+        (repo_path
+          |> file.path(path)
+          |> read_data_frame()
+        )
+      }
+    , metadata = function(...) {
+        stop("need to use the real api when getting metadata. cannot use this mock api")
+    }
+  )
+}
diff --git a/R/iidda/R/repo_paths.R b/R/iidda/R/repo_paths.R
@@ -476,4 +476,19 @@ error_tar = function(tar_name) {
   tar(tarfile = tar_path, files = f)
 }
 
-
+#' Source from Digitization ID
+#'
+#' @param digitization_ids Character vector of digitization IDs
+#' @return Character vector of source IDs associated with digitization.
+#' @export
+source_from_digitization_id = function(digitization_ids) {
+  u = unique(digitization_ids)
+  l = setNames(
+      vapply(u
+        , \(x) Sys.glob(sprintf("pipelines/*/digitizations/%s.*.json", x))
+        , character(1L)
+      ) |> dirname() |> dirname() |> basename()
+    , u
+  )
+  l[digitization_ids]
+}
diff --git a/R/iidda/man/mock_api_hook.Rd b/R/iidda/man/mock_api_hook.Rd
diff --git a/R/iidda/man/source_from_digitization_id.Rd b/R/iidda/man/source_from_digitization_id.Rd