Skip to content

Commit

Permalink
Merge pull request #1011 from ropensci/968
Browse files Browse the repository at this point in the history
Encapsulation the hash tables
  • Loading branch information
wlandau-lilly authored Sep 13, 2019
2 parents 47169aa + 8112b2f commit 9700619
Show file tree
Hide file tree
Showing 35 changed files with 409 additions and 326 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- Wrap up console and text file logging functionality into a reference class (#964).
- Deprecate the `verbose` argument in various caching functions. The location of the cache is now only printed in `make()`. This made the previous feature easier to implement.
- Carry forward nested grouping variables in `combine()` (#1008).
- Improve the encapsulation of hash tables in the decorated `storr` (#968).


# Version 7.6.1
Expand Down
8 changes: 4 additions & 4 deletions R/analyze_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,22 @@ analyze_knitr_in <- function(expr, results) {
expr <- ignore_ignore(expr)
files <- analyze_strings(expr[-1])
lapply(files, analyze_knitr_file, results = results)
ht_set(results$knitr_in, encode_path(files))
ht_set(results$knitr_in, reencode_path(files))
}

analyze_file_in <- function(expr, results) {
expr <- ignore_ignore(expr)
x <- analyze_strings(expr[-1])
x <- file.path(x)
x <- encode_path(x)
x <- reencode_path(x)
ht_set(results$file_in, x)
}

analyze_file_out <- function(expr, results) {
expr <- ignore_ignore(expr)
x <- analyze_strings(expr[-1])
x <- file.path(x)
x <- encode_path(x)
x <- reencode_path(x)
ht_set(results$file_out, x)
}

Expand All @@ -118,7 +118,7 @@ analyze_knitr_file <- function(file, results) {
analyze_namespaced <- function(expr, results, locals, allowed_globals) {
x <- safe_deparse(expr)
if (!ht_exists(locals, x)) {
ht_set(results$namespaced, encode_namespaced(x))
ht_set(results$namespaced, reencode_namespaced(x))
}
}

Expand Down
17 changes: 3 additions & 14 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ cached <- function(
if (targets_only) {
targets <- targets_only(targets, cache, jobs)
}
display_keys(targets)
redisplay_keys(targets)
}

targets_only <- function(targets, cache, jobs) {
Expand Down Expand Up @@ -1031,7 +1031,7 @@ drake_cache_log <- function(
if (targets_only) {
out <- out[out$type == "target", ]
}
out$name <- display_keys(out$name)
out$name <- redisplay_keys(out$name)
out
}

Expand Down Expand Up @@ -1314,18 +1314,7 @@ progress <- function(
}

get_progress_single <- function(target, cache) {
if (cache$exists(key = target, namespace = "progress")) {
hash <- cache$get_hash(key = target, namespace = "progress")
switch(
substr(hash, 1, 1),
r = "running",
d = "done",
f = "failed",
NA_character_
)
} else{
"none"
}
cache$get_progress(target = target)
}

memo_expr <- function(expr, cache, ...) {
Expand Down
2 changes: 1 addition & 1 deletion R/clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ clean_single_target <- function(
}
}
if (garbage_collection && length(files)) {
unlink(decode_path(files), recursive = TRUE)
unlink(redecode_path(files), recursive = TRUE)
}
}

Expand Down
4 changes: 2 additions & 2 deletions R/create_drake_graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ create_drake_graph <- function(
jobs,
logger
) {
config <- list(plan = plan, jobs = jobs, logger = logger)
config <- list(plan = plan, jobs = jobs, logger = logger, cache = cache)
edges <- memo_expr(
cdg_create_edges(config, layout),
cache,
Expand Down Expand Up @@ -78,7 +78,7 @@ cdg_edges_thru_file_out <- function(edges, config) {
}

cdg_transitive_edges <- function(vertex, edges, config) {
config$logger$minor("file_out", target = display_key(vertex, config))
config$logger$minor("file_out", target = config$cache$display_keys(vertex))
from <- unique(edges$from[edges$to == vertex])
to <- unique(edges$to[edges$from == vertex])
expand.grid(from = from, to = to, stringsAsFactors = FALSE)
Expand Down
189 changes: 189 additions & 0 deletions R/decorate_storr.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,47 @@ refclass_decorated_storr <- methods::setRefClass(
set = function(key, value, ...) {
dcst_set(value = value, key = key, ..., .self = .self)
},
memo_hash = function(x, fun, ...) {
ht_memo(ht = .self$ht_hash, x = x, fun = fun, ...)
},
reset_memo_hash = function() {
ht_clear(.self$ht_hash)
},
reset_ht_hash = function() {
# deprecated on 2019-09-13
ht_clear(.self$ht_hash)
},
encode_path = function(x) {
ht_memo(ht = .self$ht_encode_path, x = x, fun = reencode_path)
},
decode_path = function(x) {
ht_memo(ht = .self$ht_decode_path, x = x, fun = redecode_path)
},
encode_namespaced = function(x) {
ht_memo(ht = .self$ht_encode_namespaced, x = x, fun = reencode_namespaced)
},
decode_namespaced = function(x) {
ht_memo(ht = .self$ht_decode_namespaced, x = x, fun = redecode_namespaced)
},
display_keys = function(x) {
vapply(
X = x,
FUN = display_key,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
.self = .self
)
},
set_progress = function(target, value) {
.self$driver$set_hash(
key = target,
namespace = "progress",
hash = .self$ht_progress[[value]]
)
},
get_progress = function(target) {
retrieve_progress(target = target, cache = .self)
},
# Delegate to storr:
archive_export = function(...) .self$storr$archive_export(...),
archive_import = function(...) .self$storr$archive_import(...),
Expand Down Expand Up @@ -257,3 +295,154 @@ dir_create <- function(x) {
}
invisible()
}

#' @title Show a file's encoded representation in the cache
#' \lifecycle{stable}
#' @description This function simply wraps literal double quotes around
#' the argument `x` so `drake` knows it is the name of a file.
#' Use when you are calling functions like `deps_code()`: for example,
#' `deps_code(file_store("report.md"))`. See the examples for details.
#' Internally, `drake` wraps the names of file targets/imports
#' inside literal double quotes to avoid confusion between
#' files and generic R objects.
#' @export
#' @return A single-quoted character string: i.e., a filename
#' understandable by drake.
#' @param x Character string to be turned into a filename
#' understandable by drake (i.e., a string with literal
#' single quotes on both ends).
#' @examples
#' # Wraps the string in single quotes.
#' file_store("my_file.rds") # "'my_file.rds'"
#' \dontrun{
#' isolate_example("contain side effects", {
#' if (suppressWarnings(require("knitr"))) {
#' load_mtcars_example() # Get the code with drake_example("mtcars").
#' make(my_plan) # Run the workflow to build the targets
#' list.files() # Should include input "report.Rmd" and output "report.md".
#' head(readd(small)) # You can use symbols for ordinary objects.
#' # But if you want to read cached info on files, use `file_store()`.
#' readd(file_store("report.md"), character_only = TRUE) # File fingerprint.
#' deps_code(file_store("report.Rmd"))
#' config <- drake_config(my_plan)
#' deps_profile(
#' file_store("report.Rmd"),
#' config = config,
#' character_only = TRUE
#' )
#' }
#' })
#' }
file_store <- function(x) {
reencode_path(x)
}

display_key <- function(x, .self) {
if (is_encoded_path(x)) {
display_path(x = x, .self = .self)
} else if (is_encoded_namespaced(x)) {
sprintf("%s", .self$decode_namespaced(x = x))
} else {
x
}
}

display_path <- function(x, .self) {
path_ <- .self$decode_path(x = x)
if (is_url(path_)) {
sprintf("url %s", path_)
} else {
sprintf("file %s", path_)
}
}

redisplay_keys <- function(x) {
vapply(
X = x,
FUN = redisplay_key,
FUN.VALUE = character(1),
USE.NAMES = FALSE
)
}

redisplay_key <- function(x) {
if (is_encoded_path(x)) {
redisplay_path(x = x)
} else if (is_encoded_namespaced(x)) {
sprintf("%s", redecode_namespaced(x = x))
} else {
x
}
}

redisplay_path <- function(x) {
path <- redecode_path(x = x)
if (is_url(path)) {
sprintf("url %s", path)
} else {
sprintf("file %s", path)
}
}

is_encoded_path <- function(x) {
substr(x = x, start = 1, stop = 2) == "p-"
}

is_encoded_namespaced <- function(x) {
substr(x = x, start = 1, stop = 2) == "n-"
}

reencode_path <- function(x) {
y <- base64url::base32_encode(x = x, use.padding = FALSE)
sprintf("p-%s", y)
}

redecode_path <- function(x) {
y <- substr(x = x, start = 3, stop = 1e6)
base64url::base32_decode(x = y, use.padding = FALSE)
}

reencode_namespaced <- function(x) {
y <- base64url::base32_encode(x, use.padding = FALSE)
sprintf("n-%s", y)
}

redecode_namespaced <- redecode_path

standardize_key <- function(text) {
if (any(grepl("::", text))) {
text <- reencode_namespaced(text)
}
text
}

ht_progress <- function(hash_algorithm) {
keys <- c("running", "done", "failed")
out <- lapply(keys, progress_hash, hash_algorithm = hash_algorithm)
names(out) <- keys
out
}

progress_hash <- function(key, hash_algorithm) {
out <- digest::digest(
key,
algo = hash_algorithm,
serialize = FALSE
)
gsub("^.", substr(key, 1, 1), out)
}

retrieve_progress <- function(target, cache) {
if (cache$exists(key = target, namespace = "progress")) {
hash <- cache$get_hash(key = target, namespace = "progress")
switch(
substr(hash, 1, 1),
r = "running",
d = "done",
f = "failed",
NA_character_
)
} else{
"none"
}
}
4 changes: 2 additions & 2 deletions R/deprecated.R
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ built <- function(
},
jobs = jobs
)
display_keys(out)
redisplay_keys(out)
}

#' @title Search up the file system
Expand Down Expand Up @@ -1120,7 +1120,7 @@ imported <- function(
)
if (files_only)
targets <- parallel_filter(targets, f = is_encoded_path, jobs = jobs)
display_keys(targets)
redisplay_keys(targets)
}

#' @title Prune the graph
Expand Down
8 changes: 4 additions & 4 deletions R/deps.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ get_deps_knitr <- function(target) {
}
out <- new_code_analysis_results()
if (is_encoded_path(target)) {
target <- decode_path(target)
target <- redecode_path(target)
}
analyze_knitr_file(target, out)
list_code_analysis_results(out)
Expand All @@ -118,11 +118,11 @@ get_deps_knitr <- function(target) {
decode_deps_list <- function(x) {
for (field in c("file_in", "file_out", "knitr_in")) {
if (length(x[[field]])) {
x[[field]] <- decode_path(x[[field]])
x[[field]] <- redecode_path(x[[field]])
}
}
if (length(x$namespaced)) {
x$namespaced <- decode_namespaced(x$namespaced)
x$namespaced <- redecode_namespaced(x$namespaced)
}
x
}
Expand Down Expand Up @@ -268,7 +268,7 @@ tracked <- function(config) {
},
jobs = config$jobs_preprocess
)
display_keys(clean_dependency_list(out), config)
config$cache$display_keys(clean_dependency_list(out))
}

clean_dependency_list <- function(x) {
Expand Down
16 changes: 0 additions & 16 deletions R/drake_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -752,22 +752,6 @@ get_previous_seed <- function(cache) {
}
}

ht_progress <- function(hash_algorithm) {
keys <- c("running", "done", "failed")
out <- lapply(keys, progress_hash, hash_algorithm = hash_algorithm)
names(out) <- keys
out
}

progress_hash <- function(key, hash_algorithm) {
out <- digest::digest(
key,
algo = hash_algorithm,
serialize = FALSE
)
gsub("^.", substr(key, 1, 1), out)
}

initialize_history <- function(history, cache) {
migrate_history(history, cache)
if (identical(history, TRUE)) {
Expand Down
Loading

0 comments on commit 9700619

Please sign in to comment.