From 15c4e39f11604fc138988eb76a9f66bdad7c4603 Mon Sep 17 00:00:00 2001 From: ben-gready Date: Wed, 25 Apr 2018 09:52:41 -0600 Subject: [PATCH 1/4] first version of S3 rds storr --- DESCRIPTION | 5 +- R/driver_rds_s3.R | 305 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 R/driver_rds_s3.R diff --git a/DESCRIPTION b/DESCRIPTION index bd889a2..4f95622 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: storr Title: Simple Key Value Stores -Version: 1.1.3 +Version: 1.2.0 Description: Creates and manages simple key-value stores. These can use a variety of approaches for storing the data. This package implements the base methods and support for file system, in-memory @@ -22,6 +22,7 @@ Suggests: knitr, mockr, rbenchmark, - testthat (>= 1.0.0) + testthat (>= 1.0.0), + aws.s3 VignetteBuilder: knitr RoxygenNote: 6.0.1 diff --git a/R/driver_rds_s3.R b/R/driver_rds_s3.R new file mode 100644 index 0000000..0cfcee6 --- /dev/null +++ b/R/driver_rds_s3.R @@ -0,0 +1,305 @@ +##' Object cache driver that saves objects using R's native +##' serialized file format (see \code{\link{saveRDS}}) on and S3 bucket. +##' +##' The \code{mangle_key} argument will run each key that is created +##' through a "base 64" encoding. This means that keys that include +##' symbols that are invalid on filesystems (e.g, "/", ":") will be +##' replaced by harmless characters. The RFC 4648 dialect is used +##' where "-" and "_" are used for character 62 and 63 (this differs +##' from most R base64 encoders). This mangling is designed to be +##' transparent to the user -- the storr will appear to store things +##' with unmangled keys but the names of the stored files will be +##' different. +##' +##' Note that the \emph{namespace} is not mangled (at least not yet) +##' so needs to contain characters that are valid in a filename. +##' +##' Because the actual file will be stored with mangled names it is +##' not safe to use the same path for a storr with and without +##' mangling. So once an rds storr has been created its "mangledness" +##' is set. Using \code{mangle_key = NULL} uses whatever mangledness +##' exists (or no mangledness if creating a new storr). +##' +##' @title rds object cache driver +##' @param path Path for the store. \code{tempdir()} is a good choice +##' for ephemeral storage, The \code{rappdirs} package (on CRAN) +##' might be nice for persistent application data. +##' +##' @param compress Compress the generated file? This saves a small +##' amount of space for a reasonable amount of time. +##' +##' @param mangle_key Mangle keys? If TRUE, then the key is encoded +##' using base64 before saving to the filesystem. See Details. +##' +##' @param mangle_key_pad Logical indicating if the filenames created +##' when using \code{mangle_key} should also be "padded" with the +##' \code{=} character to make up a round number of bytes. Padding +##' is required to satisfy the document that describes base64 +##' encoding (RFC 4648) but can cause problems in some applications +##' (see \href{https://github.com/richfitz/storr/issues/43}{this +##' issue}. The default is to not pad \emph{new} storr archives. +##' This should be generally safe to leave alone. +##' +##' @param hash_algorithm Name of the hash algorithm to use. Possible +##' values are "md5", "sha1", and others supported by +##' \code{\link{digest}}. If not given, then we will default to +##' "md5". +##' +##' @param default_namespace Default namespace (see +##' \code{\link{storr}}). +##' @export +##' @examples +##' +##' # Create an rds storr in R's temporary directory: +##' st <- storr_rds(tempfile()) +##' +##' # Store some data (10 random numbers against the key "foo") +##' st$set("foo", runif(10)) +##' st$list() +##' +##' # And retrieve the data: +##' st$get("foo") +##' +##' # Keys that are not valid filenames will cause issues. This will +##' # cause an error: +##' \dontrun{ +##' st$set("foo/bar", letters) +##' } +##' +##' # The solution to this is to "mangle" the key names. Storr can do +##' # this for you: +##' st2 <- storr_rds(tempfile(), mangle_key = TRUE) +##' st2$set("foo/bar", letters) +##' st2$list() +##' st2$get("foo/bar") +##' +##' # Behind the scenes, storr is safely encoding the filenames with base64: +##' dir(file.path(st2$driver$path, "keys", "objects")) +##' +##' # Clean up the two storrs: +##' st$destroy() +##' st2$destroy() +storr_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, + mangle_key_pad = NULL, hash_algorithm = NULL, + default_namespace = "objects") { + storr(driver_rds_s3(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm), + default_namespace) +} + +##' @export +##' @rdname storr_rds +driver_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, + mangle_key_pad = NULL, hash_algorithm = NULL) { + R6_driver_rds_s3$new(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm) +} + +R6_driver_rds_s3 <- R6::R6Class( + "driver_rds)s3", + public = list( + ## TODO: things like hash_algorithm: do they belong in traits? + ## This needs sorting before anyone writes their own driver! + bucket = NULL, + path = NULL, + compress = NULL, + mangle_key = NULL, + mangle_key_pad = NULL, + hash_algorithm = NULL, + traits = list(accept = "raw"), + + initialize = function(bucket, path, compress, mangle_key, mangle_key_pad, + hash_algorithm) { + + is_new <- !s3_object_exists(bucket = bucket, path = file.path(path, "config")) + aws.s3::put_folder(folder = path, bucket = bucket) + aws.s3::put_folder(folder = file.path(path, "data"), bucket = bucket) + aws.s3::put_folder(folder = file.path(path, "keys"), bucket = bucket) + aws.s3::put_folder(folder = file.path(path, "config"), bucket = bucket) + self$bucket <- bucket + self$path <- path + + ## This is a bit of complicated dancing around to mantain + ## backward compatibility while allowing better defaults in + ## future versions. I'm writing out a version number here that + ## future versions of driver_rds can use to patch, warn or + ## change behaviour with older versions of the storr. + if (!is_new && !s3_object_exists(path = driver_rds_s3_config_file(path, "version"), bucket = bucket)) { + s3_write_if_missing("1.0.1", bucket = bucket, path = driver_rds_s3_config_file(path, "version")) + s3_write_if_missing("TRUE", bucket = bucket, path = driver_rds_s3_config_file(path, "mangle_key_pad")) + s3_write_if_missing("TRUE", bucket = bucket, path = driver_rds_s3_config_file(path, "compress")) + s3_write_if_missing("md5", bucket = bucket, path = driver_rds_s3_config_file(path, "hash_algorithm")) + } + ## Then write out the version number: + s3_write_if_missing(as.character(packageVersion("storr")), + bucket = bucket, + path = driver_rds_s3_config_file(path, "version")) + + if (!is.null(mangle_key)) { + assert_scalar_logical(mangle_key) + } + self$mangle_key <- driver_rds_s3_config(bucket, path, "mangle_key", mangle_key, + FALSE, TRUE) + + if (!is.null(mangle_key_pad)) { + assert_scalar_logical(mangle_key_pad) + } + self$mangle_key_pad <- + driver_rds_s3_config(bucket, path, "mangle_key_pad", mangle_key_pad, + FALSE, TRUE) + + if (!is.null(compress)) { + assert_scalar_logical(compress) + } + self$compress <- driver_rds_s3_config(bucket, path, "compress", compress, + TRUE, FALSE) + + if (!is.null(hash_algorithm)) { + assert_scalar_character(hash_algorithm) + } + self$hash_algorithm <- driver_rds_s3_config(bucket, path, "hash_algorithm", + hash_algorithm, "md5", TRUE) + }, + + type = function() { + "rds_s3" + }, + destroy = function() { + aws.s3::delete_object(object = self$path, bucket = self$bucket) + warning("not fully implemented") + }, + + get_hash = function(key, namespace) { + s3_readLines(path = self$name_key(key, namespace), bucket = self$bucket) + }, + set_hash = function(key, namespace, hash) { + dir_create(self$name_key("", namespace)) + s3_writeLines(text = hash, path = self$name_key(key, namespace), bucket = self$bucket) #*** should be making use of (or making an equivalent version of) the write_lines function within the storr package here (deletes file if the write fails) + }, + get_object = function(hash) { + aws.s3::s3readRDS(object = self$name_hash(hash), bucket = self$bucket) + }, + set_object = function(hash, value) { + ## NOTE: this takes advantage of having the serialized value + ## already and avoids seralising twice. + assert_raw(value) + aws.s3::s3write_using(x = value, FUN = function(v, p) write_serialized_rds(v, p, self$compress), object = self$name_hash(hash), bucket = self$bucket) + }, + + exists_hash = function(key, namespace) { + s3_object_exists(self$name_key(key, namespace), bucket = self$bucket) + }, + exists_object = function(hash) { + s3_object_exists(self$name_hash(hash), bucket = self$bucket) + }, + + del_hash = function(key, namespace) { + #file_remove(self$name_key(key, namespace)) + warning("not implemented") + }, + del_object = function(hash) { + #file_remove(self$name_hash(hash)) + warning("not implemented") + }, + + list_hashes = function() { + sub("\\.rds$", "", s3_list_dir(bucket = self$bucket, path = file.path(self$path, "data"))) + }, + list_namespaces = function() { + s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys")) + }, + list_keys = function(namespace) { + ret <- s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys", namespace)) + if (self$mangle_key) decode64(ret, TRUE) else ret + }, + + name_hash = function(hash) { + if (length(hash) > 0L) { + file.path(self$path, "data", paste0(hash, ".rds")) + } else { + character(0) + } + }, + name_key = function(key, namespace) { + if (self$mangle_key) { + key <- encode64(key, pad = self$mangle_key_pad) + } + file.path(self$path, "keys", namespace, key) + } + )) + +## This attempts to check that we are connecting to a storr of +## appropriate mangledness. There's a lot of logic here, but it's +## actually pretty simple in practice and tested in test-driver-rds.R: +## +## if mangle_key is NULL we take the mangledless of the +## existing storr or set up for no mangling. +## +## if mangle_key is not NULL then it is an error if it differs +## from the existing storr's mangledness. +driver_rds_s3_config <- function(bucket, path, name, value, default, must_agree) { + path_opt <- driver_rds_s3_config_file(path, name) + + load_value <- function() { + if (s3_object_exists(bucket, path_opt)) { + value <- s3_readLines(path_opt, bucket) + storage.mode(value) <- storage.mode(default) + } else { + value <- default + } + value + } + + if (is.null(value)) { + value <- load_value() + } else if (must_agree && s3_object_exists(bucket = bucket, path = path_opt)) { + value_prev <- load_value() + if (value != value_prev) { + stop(ConfigError(name, value_prev, value)) + } + } + if (!s3_object_exists(bucket = bucket, path = path_opt)) { + s3_writeLines(text = as.character(value), path = path_opt, bucket = bucket) + } + + value +} + +driver_rds_s3_config_file <- function(path, key) { + file.path(path, "config", key) +} + +s3_write_if_missing <- function(value, bucket, path) { + if (s3_object_exists(bucket, path)) { + s3_writeLines(text, path, bucket) + } +} + +## S3 Helper functions + +s3_file_remove <- function(path, bucket) { + exists <- s3_object_exists(bucket, path) + if (any(exists)) { + objec(path[exists]) + } + invisible(exists) +} + +s3_writeLines <- function(text, path, bucket){ + aws.s3::s3write_using(x = text, FUN = writeLines, object = path, bucket = bucket) +} + +s3_readLines <- function(path, bucket){ + aws.s3::s3read_using(FUN = readLines, object = path, bucket = bucket) +} + +s3_object_exists <- function(bucket, path){ + suppressMessages(aws.s3::head_object(object = path, bucket = bucket)[1]) +} + +s3_list_dir <- function(bucket, path){ + if(substr(path, nchar(path), nchar(path)) != "/") path = paste0(path, "/") + files_table <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf) + keys <- files_table[files_table$Size > 0,]$Key + files <- gsub(pattern = path, replacement = "", x = keys) + split_names <- strsplit(files, "/") + unique(unlist(lapply(split_names, function(x) x[1]))) # first element of each split name is the file or directory within path, take unique of these, so that directories only appear once +} \ No newline at end of file From 82a79183a5949225db7ab77f01786953ecdc9b46 Mon Sep 17 00:00:00 2001 From: ben-gready Date: Wed, 25 Apr 2018 10:29:15 -0600 Subject: [PATCH 2/4] update NAMESPACE --- NAMESPACE | 2 + man/storr_rds.Rd | 6 ++- man/storr_rds_s3.Rd | 94 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 man/storr_rds_s3.Rd diff --git a/NAMESPACE b/NAMESPACE index c3a397b..0d73c3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(decode64) export(driver_dbi) export(driver_environment) export(driver_rds) +export(driver_rds_s3) export(driver_redis_api) export(encode64) export(fetch_hook_read) @@ -14,6 +15,7 @@ export(storr_dbi) export(storr_environment) export(storr_external) export(storr_rds) +export(storr_rds_s3) export(storr_redis_api) export(test_driver) importFrom(R6,R6Class) diff --git a/man/storr_rds.Rd b/man/storr_rds.Rd index 84f468f..baaf592 100644 --- a/man/storr_rds.Rd +++ b/man/storr_rds.Rd @@ -1,8 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/driver_rds.R +% Please edit documentation in R/driver_rds.R, R/driver_rds_s3.R \name{storr_rds} \alias{storr_rds} \alias{driver_rds} +\alias{driver_rds_s3} \title{rds object cache driver} \usage{ storr_rds(path, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, @@ -10,6 +11,9 @@ storr_rds(path, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, driver_rds(path, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, hash_algorithm = NULL) + +driver_rds_s3(bucket, path, compress = NULL, mangle_key = NULL, + mangle_key_pad = NULL, hash_algorithm = NULL) } \arguments{ \item{path}{Path for the store. \code{tempdir()} is a good choice diff --git a/man/storr_rds_s3.Rd b/man/storr_rds_s3.Rd new file mode 100644 index 0000000..1a9a789 --- /dev/null +++ b/man/storr_rds_s3.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/driver_rds_s3.R +\name{storr_rds_s3} +\alias{storr_rds_s3} +\title{rds object cache driver} +\usage{ +storr_rds_s3(bucket, path, compress = NULL, mangle_key = NULL, + mangle_key_pad = NULL, hash_algorithm = NULL, + default_namespace = "objects") +} +\arguments{ +\item{path}{Path for the store. \code{tempdir()} is a good choice +for ephemeral storage, The \code{rappdirs} package (on CRAN) +might be nice for persistent application data.} + +\item{compress}{Compress the generated file? This saves a small +amount of space for a reasonable amount of time.} + +\item{mangle_key}{Mangle keys? If TRUE, then the key is encoded +using base64 before saving to the filesystem. See Details.} + +\item{mangle_key_pad}{Logical indicating if the filenames created +when using \code{mangle_key} should also be "padded" with the +\code{=} character to make up a round number of bytes. Padding +is required to satisfy the document that describes base64 +encoding (RFC 4648) but can cause problems in some applications +(see \href{https://github.com/richfitz/storr/issues/43}{this +issue}. The default is to not pad \emph{new} storr archives. +This should be generally safe to leave alone.} + +\item{hash_algorithm}{Name of the hash algorithm to use. Possible +values are "md5", "sha1", and others supported by +\code{\link{digest}}. If not given, then we will default to +"md5".} + +\item{default_namespace}{Default namespace (see +\code{\link{storr}}).} +} +\description{ +Object cache driver that saves objects using R's native +serialized file format (see \code{\link{saveRDS}}) on and S3 bucket. +} +\details{ +The \code{mangle_key} argument will run each key that is created +through a "base 64" encoding. This means that keys that include +symbols that are invalid on filesystems (e.g, "/", ":") will be +replaced by harmless characters. The RFC 4648 dialect is used +where "-" and "_" are used for character 62 and 63 (this differs +from most R base64 encoders). This mangling is designed to be +transparent to the user -- the storr will appear to store things +with unmangled keys but the names of the stored files will be +different. + +Note that the \emph{namespace} is not mangled (at least not yet) +so needs to contain characters that are valid in a filename. + +Because the actual file will be stored with mangled names it is +not safe to use the same path for a storr with and without +mangling. So once an rds storr has been created its "mangledness" +is set. Using \code{mangle_key = NULL} uses whatever mangledness +exists (or no mangledness if creating a new storr). +} +\examples{ + +# Create an rds storr in R's temporary directory: +st <- storr_rds(tempfile()) + +# Store some data (10 random numbers against the key "foo") +st$set("foo", runif(10)) +st$list() + +# And retrieve the data: +st$get("foo") + +# Keys that are not valid filenames will cause issues. This will +# cause an error: +\dontrun{ +st$set("foo/bar", letters) +} + +# The solution to this is to "mangle" the key names. Storr can do +# this for you: +st2 <- storr_rds(tempfile(), mangle_key = TRUE) +st2$set("foo/bar", letters) +st2$list() +st2$get("foo/bar") + +# Behind the scenes, storr is safely encoding the filenames with base64: +dir(file.path(st2$driver$path, "keys", "objects")) + +# Clean up the two storrs: +st$destroy() +st2$destroy() +} From fc7bba3a44918e19bd6e4afb37d72fe347b1e0c6 Mon Sep 17 00:00:00 2001 From: ben-gready Date: Wed, 25 Apr 2018 22:09:23 -0600 Subject: [PATCH 3/4] ability to delete --- R/driver_rds_s3.R | 94 +++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/R/driver_rds_s3.R b/R/driver_rds_s3.R index 0cfcee6..fe20dc4 100644 --- a/R/driver_rds_s3.R +++ b/R/driver_rds_s3.R @@ -80,8 +80,8 @@ ##' st$destroy() ##' st2$destroy() storr_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") { + mangle_key_pad = NULL, hash_algorithm = NULL, + default_namespace = "objects") { storr(driver_rds_s3(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm), default_namespace) } @@ -89,7 +89,7 @@ storr_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, ##' @export ##' @rdname storr_rds driver_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL) { + mangle_key_pad = NULL, hash_algorithm = NULL) { R6_driver_rds_s3$new(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm) } @@ -105,10 +105,10 @@ R6_driver_rds_s3 <- R6::R6Class( mangle_key_pad = NULL, hash_algorithm = NULL, traits = list(accept = "raw"), - + initialize = function(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm) { - + is_new <- !s3_object_exists(bucket = bucket, path = file.path(path, "config")) aws.s3::put_folder(folder = path, bucket = bucket) aws.s3::put_folder(folder = file.path(path, "data"), bucket = bucket) @@ -116,7 +116,7 @@ R6_driver_rds_s3 <- R6::R6Class( aws.s3::put_folder(folder = file.path(path, "config"), bucket = bucket) self$bucket <- bucket self$path <- path - + ## This is a bit of complicated dancing around to mantain ## backward compatibility while allowing better defaults in ## future versions. I'm writing out a version number here that @@ -130,49 +130,48 @@ R6_driver_rds_s3 <- R6::R6Class( } ## Then write out the version number: s3_write_if_missing(as.character(packageVersion("storr")), - bucket = bucket, + bucket = bucket, path = driver_rds_s3_config_file(path, "version")) - + if (!is.null(mangle_key)) { assert_scalar_logical(mangle_key) } self$mangle_key <- driver_rds_s3_config(bucket, path, "mangle_key", mangle_key, - FALSE, TRUE) - + FALSE, TRUE) + if (!is.null(mangle_key_pad)) { assert_scalar_logical(mangle_key_pad) } self$mangle_key_pad <- driver_rds_s3_config(bucket, path, "mangle_key_pad", mangle_key_pad, - FALSE, TRUE) - + FALSE, TRUE) + if (!is.null(compress)) { assert_scalar_logical(compress) } self$compress <- driver_rds_s3_config(bucket, path, "compress", compress, - TRUE, FALSE) - + TRUE, FALSE) + if (!is.null(hash_algorithm)) { assert_scalar_character(hash_algorithm) } self$hash_algorithm <- driver_rds_s3_config(bucket, path, "hash_algorithm", - hash_algorithm, "md5", TRUE) + hash_algorithm, "md5", TRUE) }, - + type = function() { "rds_s3" }, destroy = function() { - aws.s3::delete_object(object = self$path, bucket = self$bucket) - warning("not fully implemented") + s3_delete_recursive(bucket = self$bucket, path = self$path) }, - + get_hash = function(key, namespace) { s3_readLines(path = self$name_key(key, namespace), bucket = self$bucket) }, set_hash = function(key, namespace, hash) { dir_create(self$name_key("", namespace)) - s3_writeLines(text = hash, path = self$name_key(key, namespace), bucket = self$bucket) #*** should be making use of (or making an equivalent version of) the write_lines function within the storr package here (deletes file if the write fails) + s3_writeLines(text = hash, path = self$name_key(key, namespace), bucket = self$bucket) #*** should be making use of (or making an equivalent version of) the write_lines function within the storr package here (I think it deletes file if the write fails) }, get_object = function(hash) { aws.s3::s3readRDS(object = self$name_hash(hash), bucket = self$bucket) @@ -183,23 +182,29 @@ R6_driver_rds_s3 <- R6::R6Class( assert_raw(value) aws.s3::s3write_using(x = value, FUN = function(v, p) write_serialized_rds(v, p, self$compress), object = self$name_hash(hash), bucket = self$bucket) }, - + exists_hash = function(key, namespace) { s3_object_exists(self$name_key(key, namespace), bucket = self$bucket) }, exists_object = function(hash) { s3_object_exists(self$name_hash(hash), bucket = self$bucket) }, - + del_hash = function(key, namespace) { - #file_remove(self$name_key(key, namespace)) - warning("not implemented") + #s3_delete_file(bucket = self$bucket, path = self$name_key(key, namespace)) + ## above deletes just one file (s3 key). + ## However it will throw an error if the file we are trying to delete looks like a directory. + ## S3 has no actual notion of directory, we just fake it using "/". As a result, it's possible to get into a muddle. + ## to play it save, line below can be uncommented to force it to delete just the path given, but throw a warning, if it does look like a directory + ## can also change to if_dir = "del_recursive" to delete the whole directory with a warning. May never actually show up as an issue, this is just a note. + s3_delete_file(bucket = self$bucket, path = self$name_key(key, namespace), if_dir = "del_only_key") # this will throw a warning if your file to be deleted looks like a directory on S3. It will then delete the directory. If this occurs alot for some reason, may want to change behaviour of this delete. + }, del_object = function(hash) { - #file_remove(self$name_hash(hash)) - warning("not implemented") + # see above note which also applies here + s3_delete_file(bucket = self$bucket, path = self$name_hash(key, namespace), if_dir = "del_only_key") }, - + list_hashes = function() { sub("\\.rds$", "", s3_list_dir(bucket = self$bucket, path = file.path(self$path, "data"))) }, @@ -210,7 +215,7 @@ R6_driver_rds_s3 <- R6::R6Class( ret <- s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys", namespace)) if (self$mangle_key) decode64(ret, TRUE) else ret }, - + name_hash = function(hash) { if (length(hash) > 0L) { file.path(self$path, "data", paste0(hash, ".rds")) @@ -237,7 +242,7 @@ R6_driver_rds_s3 <- R6::R6Class( ## from the existing storr's mangledness. driver_rds_s3_config <- function(bucket, path, name, value, default, must_agree) { path_opt <- driver_rds_s3_config_file(path, name) - + load_value <- function() { if (s3_object_exists(bucket, path_opt)) { value <- s3_readLines(path_opt, bucket) @@ -247,7 +252,7 @@ driver_rds_s3_config <- function(bucket, path, name, value, default, must_agree) } value } - + if (is.null(value)) { value <- load_value() } else if (must_agree && s3_object_exists(bucket = bucket, path = path_opt)) { @@ -259,7 +264,7 @@ driver_rds_s3_config <- function(bucket, path, name, value, default, must_agree) if (!s3_object_exists(bucket = bucket, path = path_opt)) { s3_writeLines(text = as.character(value), path = path_opt, bucket = bucket) } - + value } @@ -296,10 +301,35 @@ s3_object_exists <- function(bucket, path){ } s3_list_dir <- function(bucket, path){ - if(substr(path, nchar(path), nchar(path)) != "/") path = paste0(path, "/") files_table <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf) keys <- files_table[files_table$Size > 0,]$Key files <- gsub(pattern = path, replacement = "", x = keys) split_names <- strsplit(files, "/") unique(unlist(lapply(split_names, function(x) x[1]))) # first element of each split name is the file or directory within path, take unique of these, so that directories only appear once +} + +s3_delete_recursive <- function(bucket, path, force=FALSE){ + files <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf)[["Key"]] + invisible(lapply(files, function(x) aws.s3::delete_object(x, bucket))) +} + +s3_delete_file <- function(bucket, path, if_dir = c("stop", "del_only_key", "del_recursive")){ + files <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf)[["Key"]] + if(length(files) > 1){ + if_dir == match.arg(if_dir) # only need this if we get inside this loop + if(if_dir == "stop"){ + stop("You are trying to delete 1 file, but it looks like it is setup like a directory") + } + if(if_dir == "del_only_key"){ + warning("You are trying to delete 1 file, but it looks like it is setup like a directory. Deleted specific path you requested") + invisible(aws.s3::delete_object(object = path, bucket = bucket)) + } + if(if_dir == "del_recursive"){ + warning("You are trying to delete 1 file, but it looks like it is setup like a directory. Deleting recursively everyting below the path you specified") + s3_delete_recursive(bucket, path) + } + } + else{ + invisible(aws.s3::delete_object(object = path, bucket = bucket)) + } } \ No newline at end of file From 0afafc55a4cc1d63b231d3d6dff947851a12a964 Mon Sep 17 00:00:00 2001 From: ben-gready Date: Wed, 25 Apr 2018 22:18:59 -0600 Subject: [PATCH 4/4] added an explanation in README.md, which should be moved if merged to real repo --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b039443..41d6a4c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Simple object cacher for R. `storr` acts as a very simple key-value store (supp * Fetch from an external source (e.g. website) if a key is not found locally * Pluggable storage backends - currently - environment (memory) - - rds (disk) + - rds (disk, AWS S3) - [DBI](https://cran.r-project.org/package=DBI) though which you can use: * [SQLite](https://sqlite.org) (via [RSQLite](https://cran.r-project.org/package=RSQLite)) * [Postgres](https://postgresql.org) (via @@ -43,3 +43,15 @@ remotes::install_github("richfitz/storr@develop", upgrade = FALSE) * [storr](https://richfitz.github.io/storr/articles/storr.html) `vignette("storr")` outlines basic use and core implementation details. * [external](https://richfitz.github.io/storr/articles/external.html) `vignette("external")` shows how to use storr to cache external resources such as files, web resources, etc, using the `storr_external` object. + +## RDS on AWS S3 + +*This section should be moved elsewhere if the real storr package decides to merge this work* + +To use an S3 bucket behind an rds store, you must have an AWS access key id and matching secret access key. These may be generated from the [AWS console](https://docs.aws.amazon.com/general/latest/gr/managing-aws-access-keys.html). The are a number of ways to make these access keys accessible, detailed in the [aws.s3 package](https://github.com/cloudyr/aws.s3) documentation. We recommend placing the credentials in `~/aws/credentials` in the following format: + +``` +[default] +aws_access_key_id = your_id +aws_secret_access_key = your_secret_key +```