diff --git a/R/api-dsl.R b/R/api-dsl.R index 841f2364a..150bd69b1 100644 --- a/R/api-dsl.R +++ b/R/api-dsl.R @@ -16,7 +16,8 @@ #' @seealso [drake_plan()] #' @return A transformed workflow plan data frame #' @param plan Workflow plan data frame with a column for targets, -#' a column for commands, and a column for transformations. +#' a column for commands, a column for transformations, +#' and a column for optional grouping variables. #' @param trace Logical, whether to add columns to show #' what happened during target transformations, e.g. #' `drake_plan(x = target(..., transform = ...), transform = TRUE)`. @@ -24,7 +25,7 @@ #' plan1 <- drake_plan( #' analysis = target( #' analyze_data("source"), -#' transform = cross(source = c(source1, source2)) +#' transform = map(source = c(source1, source2)) # cross() would work too #' ), #' transform = FALSE #' ) @@ -66,7 +67,7 @@ transform_row <- function(plan, row) { post_hoc_groups <- parse_group(plan[["group"]][[row]]) transform <- parse_transform(plan$transform[[row]], plan) new_cols <- c(target, post_hoc_groups, group_names(transform)) - check_groupings(new_cols, old_cols(plan)) + check_group_names(new_cols, old_cols(plan)) out <- dsl_transform(transform, target, command, plan) out[[target]] <- out$target old_cols <- setdiff( @@ -82,12 +83,12 @@ transform_row <- function(plan, row) { out } -dsl_transform.cross <- function(transform, target, command, plan) { +map_to_grid <- function(transform, target, command, plan) { groupings <- groupings(transform) if (!length(groupings)) { return(dsl_default_df(target, command)) } - grid <- do.call(expand.grid, c(groupings, stringsAsFactors = FALSE)) + grid <- dsl_grid(transform, groupings) ncl <- c(names(new_groupings(transform)), "target", "command", "transform") plan <- plan[, setdiff(colnames(plan), ncl), drop = FALSE] grid <- join_protect_x(grid, plan) @@ -102,6 +103,17 @@ dsl_transform.cross <- function(transform, target, command, plan) { cbind(out, grid) } +dsl_grid <- function(...) UseMethod("dsl_grid") + +dsl_grid.cross <- function(transform, groupings) { + do.call(expand.grid, c(groupings, stringsAsFactors = FALSE)) +} + +dsl_grid.map <- function(transform, groupings) { + check_map_groupings(transform, groupings) + as.data.frame(groupings) +} + grid_commands <- function(command, grid) { grid <- grid[, intersect(symbols(command), colnames(grid)), drop = FALSE] for (i in seq_along(grid)) { @@ -127,6 +139,12 @@ new_targets <- function(target, grid) { make.names(paste(target, apply(grid, 1, paste, collapse = "_"), sep = "_")) } +dsl_transform <- function(...) { + UseMethod("dsl_transform") +} + +dsl_transform.cross <- dsl_transform.map <- map_to_grid + dsl_transform.reduce <- function(transform, target, command, plan) { command_symbols <- intersect(symbols(command), colnames(plan)) keep <- complete_cases(plan[, command_symbols, drop = FALSE]) @@ -264,10 +282,6 @@ parse_group.default <- function(group) { all.vars(group, functions = FALSE) } -dsl_transform <- function(...) { - UseMethod("dsl_transform") -} - dsl_syms <- function(x) { out <- lapply(as.character(x), dsl_sym) } @@ -289,7 +303,18 @@ dsl_default_df <- function(target, command) { ) } -check_groupings <- function(groups, protect) { +check_map_groupings <- function(transform, groupings) { + n <- unique(vapply(groupings, length, FUN.VALUE = integer(1))) + if (length(n) > 1) { + stop( + "uneven groupings in ", char(transform), ":\n", + multiline_message(groupings), + call. = FALSE + ) + } +} + +check_group_names <- function(groups, protect) { groups <- intersect(groups, protect) if (length(groups)) { stop( diff --git a/R/api-plan.R b/R/api-plan.R index 821ceb7b0..dfc1b2b36 100644 --- a/R/api-plan.R +++ b/R/api-plan.R @@ -95,11 +95,13 @@ #' # check your workflow with `vis_drake_graph()` #' # before running `make()`. #' drake_plan( -#' small = simulate(48), -#' large = simulate(64), +#' data = target( +#' simulate(nrows), +#' transform = map(nrows = c(48, 64)) +#' ), #' reg = target( #' reg_fun(data), -#' transform = cross(reg_fun = c(reg1, reg2), data = c(small, large)) +#' transform = cross(reg_fun = c(reg1, reg2), data) #' ), #' summ = target( #' sum_fun(data, reg), @@ -118,7 +120,7 @@ #' large = simulate(64), #' reg1 = target( #' reg_fun(data), -#' transform = cross(data = c(small, large)), +#' transform = map(data = c(small, large)), #' group = reg #' ), #' reg2 = target( diff --git a/man/drake_plan.Rd b/man/drake_plan.Rd index f45c7768f..3472142a2 100644 --- a/man/drake_plan.Rd +++ b/man/drake_plan.Rd @@ -119,11 +119,13 @@ deps_code("report.Rmd") # check your workflow with `vis_drake_graph()` # before running `make()`. drake_plan( - small = simulate(48), - large = simulate(64), + data = target( + simulate(nrows), + transform = map(nrows = c(48, 64)) + ), reg = target( reg_fun(data), - transform = cross(reg_fun = c(reg1, reg2), data = c(small, large)) + transform = cross(reg_fun = c(reg1, reg2), data) ), summ = target( sum_fun(data, reg), @@ -142,7 +144,7 @@ drake_plan( large = simulate(64), reg1 = target( reg_fun(data), - transform = cross(data = c(small, large)), + transform = map(data = c(small, large)), group = reg ), reg2 = target( diff --git a/man/transform_plan.Rd b/man/transform_plan.Rd index 188412a74..e9b06377c 100644 --- a/man/transform_plan.Rd +++ b/man/transform_plan.Rd @@ -8,7 +8,8 @@ transform_plan(plan, trace = FALSE) } \arguments{ \item{plan}{Workflow plan data frame with a column for targets, -a column for commands, and a column for transformations.} +a column for commands, a column for transformations, +and a column for optional grouping variables.} \item{trace}{Logical, whether to add columns to show what happened during target transformations, e.g. @@ -37,7 +38,7 @@ and and want to combine and transform them later. plan1 <- drake_plan( analysis = target( analyze_data("source"), - transform = cross(source = c(source1, source2)) + transform = map(source = c(source1, source2)) # cross() would work too ), transform = FALSE ) diff --git a/tests/testthat/test-dsl.R b/tests/testthat/test-dsl.R index b58d4283e..ae9994d54 100644 --- a/tests/testthat/test-dsl.R +++ b/tests/testthat/test-dsl.R @@ -3,9 +3,10 @@ drake_context("dsl") test_with_dir("empty transformations", { out <- drake_plan( a = target(x, transform = cross()), - b = target(y, transform = reduce()) + b = target(y, transform = reduce()), + c = target(z, transform = map()) ) - exp <- drake_plan(a = x, b = y) + exp <- drake_plan(a = x, b = y, c = z) equivalent_plans(out, exp) }) @@ -15,6 +16,20 @@ test_with_dir("simple expansion", { expect_equal(plan$command, rep("1 + 1", 2)) }) +test_with_dir("simple map", { + plan <- drake_plan(a = target(1 + 1, transform = map(x = c(1, 2)))) + expect_equal(sort(plan$target), sort(c("a_1", "a_2"))) + expect_equal(plan$command, rep("1 + 1", 2)) +}) + +test_with_dir("simple map with 2 factors", { + plan <- drake_plan( + a = target(1 + 1, transform = map(x = c(1, 2), y = c(3, 4))) + ) + expect_equal(sort(plan$target), sort(c("a_1_3", "a_2_4"))) + expect_equal(plan$command, rep("1 + 1", 2)) +}) + test_with_dir("all new crossings", { out <- drake_plan( analysis = target( @@ -29,17 +44,47 @@ test_with_dir("all new crossings", { equivalent_plans(out, exp) }) +test_with_dir("1 new map", { + out <- drake_plan( + analysis = target( + analyze_data(source), + transform = map(source = c(source1, source2)) + ) + ) + exp <- drake_plan( + analysis_source1 = analyze_data(source1), + analysis_source2 = analyze_data(source2) + ) + equivalent_plans(out, exp) +}) + +test_with_dir("2 new maps", { + out <- drake_plan( + analysis = target( + analyze_data(source, set), + transform = map(source = c(source1, source2), set = c(set1, set2)) + ) + ) + exp <- drake_plan( + analysis_source1_set1 = analyze_data(source1, set1), + analysis_source2_set2 = analyze_data(source2, set2) + ) + equivalent_plans(out, exp) +}) + test_with_dir("groups and command symbols are undefined", { out <- drake_plan( small = simulate(48), large = simulate(64), lots = target(nobody(home), transform = cross(a, b)), + mots = target(everyone(out), transform = map(c, d)), winners = target(min(nobodyhome), transform = reduce(data)) ) exp <- drake_plan( small = simulate(48), large = simulate(64), lots = nobody(home), + mots = everyone(out), winners = min(nobodyhome) ) equivalent_plans(out, exp) @@ -150,6 +195,131 @@ test_with_dir("dsl with the mtcars plan", { equivalent_plans(out, exp) }) +test_with_dir("more map", { + out <- drake_plan( + small = simulate(48), + large = simulate(64), + reg = target( + reg_fun(data), + transform = map(reg_fun = c(reg1, reg2), data = c(small, large)) + ), + summ = target( + sum_fun(data, reg), + transform = map(sum_fun = c(coef, residuals), reg), + custom1 = 123L + ), + winners = target( + min(summ), + transform = reduce(sum_fun, data), + custom2 = 456L + ) + ) + exp <- drake_plan( + small = simulate(48), + large = simulate(64), + reg_reg1_small = reg1(small), + reg_reg2_large = reg2(large), + summ_coef_reg_reg1_small = target( + command = coef(small, reg_reg1_small), + custom1 = 123L + ), + summ_residuals_reg_reg2_large = target( + command = residuals(large, reg_reg2_large), + custom1 = 123L + ), + winners_residuals_large = target( + command = min( + list(summ_residuals_reg_reg2_large = summ_residuals_reg_reg2_large)), + custom2 = 456L + ), + winners_coef_small = target( + command = min( + list(summ_coef_reg_reg1_small = summ_coef_reg_reg1_small) + ), + custom2 = 456L + ) + ) + equivalent_plans(out, exp) +}) + +test_with_dir("map on mtcars-like workflow", { + out <- drake_plan( + data = target( + simulate(nrows), + transform = map(nrows = c(48, 64)) + ), + reg = target( + reg_fun(data), + transform = cross(reg_fun = c(reg1, reg2), data) + ), + summ = target( + sum_fun(data, reg), + transform = cross(sum_fun = c(coef, resid), reg) + ), + winners = target( + min(summ), + transform = reduce(data, sum_fun) + ) + ) + exp <- drake_plan( + data_48 = simulate(48), + data_64 = simulate(64), + reg_reg1_data_48 = reg1(data_48), + reg_reg2_data_48 = reg2(data_48), + reg_reg1_data_64 = reg1(data_64), + reg_reg2_data_64 = reg2(data_64), + summ_coef_reg_reg1_data_48 = coef(data_48, reg_reg1_data_48), + summ_resid_reg_reg1_data_48 = resid(data_48, reg_reg1_data_48), + summ_coef_reg_reg1_data_64 = coef(data_64, reg_reg1_data_64), + summ_resid_reg_reg1_data_64 = resid(data_64, reg_reg1_data_64), + summ_coef_reg_reg2_data_48 = coef(data_48, reg_reg2_data_48), + summ_resid_reg_reg2_data_48 = resid(data_48, reg_reg2_data_48), + summ_coef_reg_reg2_data_64 = coef(data_64, reg_reg2_data_64), + summ_resid_reg_reg2_data_64 = resid(data_64, reg_reg2_data_64), + winners_data_48_coef = min(list( + summ_coef_reg_reg1_data_48 = summ_coef_reg_reg1_data_48, + summ_coef_reg_reg2_data_48 = summ_coef_reg_reg2_data_48 + )), + winners_data_64_coef = min(list( + summ_coef_reg_reg1_data_64 = summ_coef_reg_reg1_data_64, + summ_coef_reg_reg2_data_64 = summ_coef_reg_reg2_data_64 + )), + winners_data_48_resid = min(list( + summ_resid_reg_reg1_data_48 = summ_resid_reg_reg1_data_48, + summ_resid_reg_reg2_data_48 = summ_resid_reg_reg2_data_48 + )), + winners_data_64_resid = min(list( + summ_resid_reg_reg1_data_64 = summ_resid_reg_reg1_data_64, + summ_resid_reg_reg2_data_64 = summ_resid_reg_reg2_data_64 + )) + ) + equivalent_plans(out, exp) +}) + +test_with_dir("map with unequal columns", { + expect_error( + drake_plan( + small = simulate(48), + large = simulate(64), + reg = target( + reg_fun(data), + transform = map(reg_fun = c(reg1, reg2), data = c(small, large, huge)) + ), + summ = target( + sum_fun(data, reg), + transform = map(sum_fun = c(coef, residuals), reg), + custom1 = 123L + ), + winners = target( + min(summ), + transform = reduce(sum_fun, data), + custom2 = 456L + ) + ), + regexp = "uneven groupings in map" + ) +}) + test_with_dir("dsl and custom columns", { e <- quote( drake_plan(