From 3497c088c98ea941ae856fdc65376beae8498874 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 23 Sep 2021 11:50:48 -0600 Subject: [PATCH 1/2] Fingerprint indices instead of entire data object --- R/rset.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/rset.R b/R/rset.R index a44ec4a9..f5d11531 100644 --- a/R/rset.R +++ b/R/rset.R @@ -74,7 +74,8 @@ new_rset <- function(splits, ids, attrib = NULL, res <- add_class(res, cls = subclass) } - fingerprint <- rlang::hash(res) + fingerprint <- list(map(splits$splits, "in_id"), map(splits$splits, "out_id")) + fingerprint <- rlang::hash(fingerprint) attr(res, "fingerprint") <- fingerprint res From 87ffd6dbd068fc4490c28702749a83ed1e6e0516 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 23 Sep 2021 12:25:10 -0600 Subject: [PATCH 2/2] Only map through splits once, update NEWS --- NEWS.md | 2 ++ R/rset.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 1dea223b..47fd7d42 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,8 @@ * Escalated the deprecation of the `gather()` method for `rset` objects to a hard deprecation. Use `tidyr::pivot_longer()` instead (#257). +* Changed resample "fingerprint" to hash the indices only rather than the entire resample result (including the data object). This is much faster and will still ensure the same resample for the same original data object (#259). + # rsample 0.1.0 * Fixed how `mc_cv()`, `initial_split()`, and `validation_split()` use the `prop` argument to first compute the assessment indices, rather than the analysis indices. This is a minor but **breaking change** in some situations; the previous implementation could cause an inconsistency in the sizes of the generated analysis and assessment sets when compared to how `prop` is documented to function (#217, @issactoast). diff --git a/R/rset.R b/R/rset.R index f5d11531..455c43ae 100644 --- a/R/rset.R +++ b/R/rset.R @@ -74,7 +74,7 @@ new_rset <- function(splits, ids, attrib = NULL, res <- add_class(res, cls = subclass) } - fingerprint <- list(map(splits$splits, "in_id"), map(splits$splits, "out_id")) + fingerprint <- map(res$splits, function(x) list(x$in_id, x$out_id)) fingerprint <- rlang::hash(fingerprint) attr(res, "fingerprint") <- fingerprint