diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 50f306e1c..cedc84de5 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,4 +1,5 @@ on: + workflow_dispatch: push: branches: - main @@ -35,7 +36,7 @@ jobs: - {os: 'windows-latest', tf: 'release', r: 'release'} - {os: 'macOS-latest' , tf: 'release', r: 'release'} - - {os: 'ubuntu-latest', tf: '2.13.0rc1', r: 'release'} + - {os: 'ubuntu-latest', tf: '2.13', r: 'release'} - {os: 'ubuntu-latest', tf: '2.12', r: 'release'} - {os: 'ubuntu-latest', tf: '2.11', r: 'release'} - {os: 'ubuntu-latest', tf: '2.10', r: 'release'} diff --git a/DESCRIPTION b/DESCRIPTION index f22fbc681..84926926c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ Depends: R (>= 3.4) Imports: generics (>= 0.0.1), - reticulate (> 1.22), + reticulate (>= 1.30.9000), tensorflow (>= 2.8.0), tfruns (>= 1.0), magrittr, @@ -51,3 +51,5 @@ Suggests: Roxygen: list(markdown = TRUE, r6 = FALSE) RoxygenNote: 7.2.3 VignetteBuilder: knitr +Remotes: + rstudio/reticulate diff --git a/NAMESPACE b/NAMESPACE index f073bc770..c8dfdb961 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -610,6 +610,7 @@ importFrom(reticulate,dict) importFrom(reticulate,import) importFrom(reticulate,import_builtins) importFrom(reticulate,import_from_path) +importFrom(reticulate,iter_next) importFrom(reticulate,iterate) importFrom(reticulate,py_call) importFrom(reticulate,py_capture_output) diff --git a/R/model-persistence.R b/R/model-persistence.R index 665bca1f3..015c408a3 100644 --- a/R/model-persistence.R +++ b/R/model-persistence.R @@ -342,7 +342,8 @@ model_from_yaml <- function(yaml, custom_objects = NULL) { #' @export serialize_model <- function(model, include_optimizer = TRUE) { - if (!inherits(model, "keras.engine.training.Model")) + if (!inherits(model, c("keras.engine.training.Model", + "keras.src.engine.training.Model"))) stop("You must pass a Keras model object to serialize_model") # write hdf5 file to temp file diff --git a/R/model.R b/R/model.R index 5468af6e9..f486bd507 100644 --- a/R/model.R +++ b/R/model.R @@ -555,7 +555,8 @@ resolve_input_data <- function(x, y = NULL) { args$x <- as_generator(x) } else if (inherits(x, "python.builtin.iterator")) { args$x <- x - } else if (inherits(x, "keras.utils.data_utils.Sequence")) { + } else if (inherits(x, c("keras.src.utils.data_utils.Sequence", + "keras.utils.data_utils.Sequence"))) { args$x <- x } else { if (!is.null(x)) @@ -576,7 +577,8 @@ resolve_validation_data <- function(validation_data) { args$validation_data <- as_generator(validation_data) else if (inherits(validation_data, "python.builtin.iterator")) args$validation_data <- validation_data - else if (inherits(validation_data, "keras.utils.data_utils.Sequence")) + else if (inherits(validation_data, c("keras.src.utils.data_utils.Sequence", + "keras.utils.data_utils.Sequence"))) args$validation_data <- validation_data else { args$validation_data <- keras_array(validation_data) @@ -593,32 +595,12 @@ resolve_main_thread_generators <- function(x, callback_type = "on_train_batch_be stop("Using generators that call R functions is not supported in TensorFlow 2.1 ", "Please upgrade your TF installation or downgrade to 2.0", call. = FALSE) - # we need a hack to make sure the generator is evaluated in the main thread. - python_path <- system.file("python", package = "keras") - tools <- reticulate::import_from_path("kerastools", path = python_path) - - # as_generator will return a tuple with 2 elements. - # (1) a python generator that just consumes - # a queue. - # (2) a function that evaluates the next element of the generator - # and adds to the queue. This function should be called in the main - # thread. - # we add a `on_train_batch_begin` to call this function. - o <- tools$model$as_generator(x) - - callback <- list(function(batch, logs) { - o[[2]]() - }) - names(callback) <- callback_type - - if (callback_type == "on_test_batch_begin") { - callback[[2]] <- callback[[1]] - names(callback)[[2]] <- "on_test_begin" - } - - callback <- do.call(callback_lambda, callback) - - list(generator = o[[1]], callback = callback) + # This used to house a mechanism for adding a keras callback that pumps + # the R generator from the main thread (e.g., from 'on_train_batch_begin'). + # This has since been fixed upstream, by adding a `prefetch` arg to + # reticulate::py_iterator() + # TODO: remove `resolve_main_thread_generators()` from package + list(generator = x, callback = NULL) } #' Train a Keras model @@ -1289,7 +1271,7 @@ as_generator.tensorflow.python.data.ops.dataset_ops.DatasetV2 <- function(x) { as_generator.function <- function(x) { python_path <- system.file("python", package = "keras") tools <- reticulate::import_from_path("kerastools", path = python_path) - iter <- reticulate::py_iterator(function() { + reticulate::py_iterator(function() { elem <- keras_array(x()) # deals with the case where the generator is used for prediction and only @@ -1298,8 +1280,8 @@ as_generator.function <- function(x) { elem[[2]] <- list() do.call(reticulate::tuple, elem) - }) - tools$generator$iter_generator(iter) + }, prefetch = 1L) + } as_generator.keras_preprocessing.sequence.TimeseriesGenerator <- function(x) { @@ -1354,6 +1336,9 @@ is_main_thread_generator.keras_preprocessing.sequence.TimeseriesGenerator <- fun FALSE } +is_main_thread_generator.keras.src.preprocessing.sequence.TimeseriesGenerator <- + is_main_thread_generator.keras_preprocessing.sequence.TimeseriesGenerator + is_tensorflow_dataset <- function(x) { inherits(x, "tensorflow.python.data.ops.dataset_ops.DatasetV2") || inherits(x, "tensorflow.python.data.ops.dataset_ops.Dataset") diff --git a/R/package.R b/R/package.R index 78aeaa8bd..9de9134c8 100644 --- a/R/package.R +++ b/R/package.R @@ -154,7 +154,8 @@ keras <- NULL # let KerasTensor inherit all the S3 methods of tf.Tensor, but # KerasTensor methods take precedence. - if("keras.engine.keras_tensor.KerasTensor" %in% classes) + if(any(c("keras.src.engine.keras_tensor.KerasTensor", + "keras.engine.keras_tensor.KerasTensor") %in% classes)) classes <- unique(c("keras.engine.keras_tensor.KerasTensor", "tensorflow.tensor", classes)) diff --git a/R/preprocessing.R b/R/preprocessing.R index 4222d2781..a541b1511 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -615,12 +615,20 @@ image_array_save <- function(img, path, data_format = NULL, file_format = NULL, -#' Generate batches of image data with real-time data augmentation. The data will be -#' looped over (in batches). +#' [Deprecated] Generate batches of image data with real-time data augmentation. +#' The data will be looped over (in batches). +#' +#' Deprecated: `image_data_generator` is not +#' recommended for new code. Prefer loading images with +#' `image_dataset_from_directory` and transforming the output +#' TF Dataset with preprocessing layers. For more information, see the +#' tutorials for loading images and augmenting images, as well as the +#' preprocessing layer guide. #' #' @param featurewise_center Set input mean to 0 over the dataset, feature-wise. #' @param samplewise_center Boolean. Set each sample mean to 0. -#' @param featurewise_std_normalization Divide inputs by std of the dataset, feature-wise. +#' @param featurewise_std_normalization Divide inputs by std of the dataset, +#' feature-wise. #' @param samplewise_std_normalization Divide each input by its std. #' @param zca_whitening apply ZCA whitening. #' @param zca_epsilon Epsilon for ZCA whitening. Default is 1e-6. @@ -630,12 +638,11 @@ image_array_save <- function(img, path, data_format = NULL, file_format = NULL, #' @param brightness_range the range of brightness to apply #' @param shear_range shear intensity (shear angle in radians). #' @param zoom_range amount of zoom. if scalar z, zoom will be randomly picked -#' in the range `[1-z, 1+z]`. A sequence of two can be passed instead to select -#' this range. +#' in the range `[1-z, 1+z]`. A sequence of two can be passed instead to +#' select this range. #' @param channel_shift_range shift range for each channels. -#' @param fill_mode One of "constant", "nearest", "reflect" or "wrap". -#' Points outside the boundaries of the input are filled according to -#' the given mode: +#' @param fill_mode One of "constant", "nearest", "reflect" or "wrap". Points +#' outside the boundaries of the input are filled according to the given mode: #' - "constant": `kkkkkkkk|abcd|kkkkkkkk` (`cval=k`) #' - "nearest": `aaaaaaaa|abcd|dddddddd` #' - "reflect": `abcddcba|abcd|dcbaabcd` @@ -649,14 +656,15 @@ image_array_save <- function(img, path, data_format = NULL, file_format = NULL, #' other transformation). #' @param preprocessing_function function that will be implied on each input. #' The function will run before any other modification on it. The function -#' should take one argument: one image (tensor with rank 3), and should -#' output a tensor with the same shape. +#' should take one argument: one image (tensor with rank 3), and should output +#' a tensor with the same shape. #' @param data_format 'channels_first' or 'channels_last'. In 'channels_first' #' mode, the channels dimension (the depth) is at index 1, in 'channels_last' #' mode it is at index 3. It defaults to the `image_data_format` value found #' in your Keras config file at `~/.keras/keras.json`. If you never set it, #' then it will be "channels_last". -#' @param validation_split fraction of images reserved for validation (strictly between 0 and 1). +#' @param validation_split fraction of images reserved for validation (strictly +#' between 0 and 1). #' #' @export image_data_generator <- function(featurewise_center = FALSE, samplewise_center = FALSE, @@ -685,6 +693,7 @@ image_data_generator <- function(featurewise_center = FALSE, samplewise_center = preprocessing_function = preprocessing_function, data_format = data_format ) + if (keras_version() >= "2.0.4") args$zca_epsilon <- zca_epsilon if (keras_version() >= "2.1.5") { @@ -692,6 +701,11 @@ image_data_generator <- function(featurewise_center = FALSE, samplewise_center = args$validation_split <- validation_split } + if(is.function(preprocessing_function) && + !inherits(preprocessing_function, "python.builtin.object")) + args$preprocessing_function <- + reticulate::py_main_thread_func(preprocessing_function) + do.call(keras$preprocessing$image$ImageDataGenerator, args) } @@ -766,6 +780,7 @@ fit_image_data_generator <- function(object, x, augment = FALSE, rounds = 1, see #' #' @family image preprocessing #' +#' @importFrom reticulate iter_next #' @export flow_images_from_data <- function( x, y = NULL, generator = image_data_generator(), batch_size = 32, @@ -790,7 +805,21 @@ flow_images_from_data <- function( if (keras_version() >= "2.2.0") args$sample_weight <- sample_weight - do.call(generator$flow, args) + iterator <- do.call(generator$flow, args) + + if(!is.null(generator$preprocessing_function)) { + # user supplied a custom preprocessing function, which likely is an R + # function that must be called from the main thread. Wrap this in + # py_iterator(prefetch=1) to ensure we don't end in a deadlock. + iter_env <- new.env(parent = parent.env(environment())) # pkg namespace + iter_env$.iterator <- iterator + expr <- substitute(py_iterator(function() iter_next(iterator), prefetch=1L), + list(iterator = quote(.iterator))) + iterator <- eval(expr, iter_env) + } + + iterator + } #' Generates batches of data from images in a directory (with optional diff --git a/R/py-classes.R b/R/py-classes.R index 603039576..d05bd0b95 100644 --- a/R/py-classes.R +++ b/R/py-classes.R @@ -568,7 +568,7 @@ print.py_R6ClassGenerator <- function(x, ...) { #' @export `$.py_R6ClassGenerator` <- function(x, name) { if (identical(name, "new")) - return(self) + return(x) NextMethod() } diff --git a/R/zzz.R b/R/zzz.R index b5ca7a5f1..5bca627de 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -35,3 +35,5 @@ py_to_r_wrapper.keras.src.engine.training.Model <- py_to_r_wrapper.keras.engine. #' @export summary.keras.src.engine.training.Model <- summary.keras.engine.training.Model + +as_generator.keras.src.utils.data_utils.Sequence <- as_generator.keras_preprocessing.sequence.TimeseriesGenerator diff --git a/man/image_data_generator.Rd b/man/image_data_generator.Rd index 4f1ea69f7..3acf44f3f 100644 --- a/man/image_data_generator.Rd +++ b/man/image_data_generator.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/preprocessing.R \name{image_data_generator} \alias{image_data_generator} -\title{Generate batches of image data with real-time data augmentation. The data will be -looped over (in batches).} +\title{\link{Deprecated} Generate batches of image data with real-time data augmentation. +The data will be looped over (in batches).} \usage{ image_data_generator( featurewise_center = FALSE, @@ -34,7 +34,8 @@ image_data_generator( \item{samplewise_center}{Boolean. Set each sample mean to 0.} -\item{featurewise_std_normalization}{Divide inputs by std of the dataset, feature-wise.} +\item{featurewise_std_normalization}{Divide inputs by std of the dataset, +feature-wise.} \item{samplewise_std_normalization}{Divide each input by its std.} @@ -53,14 +54,13 @@ image_data_generator( \item{shear_range}{shear intensity (shear angle in radians).} \item{zoom_range}{amount of zoom. if scalar z, zoom will be randomly picked -in the range \verb{[1-z, 1+z]}. A sequence of two can be passed instead to select -this range.} +in the range \verb{[1-z, 1+z]}. A sequence of two can be passed instead to +select this range.} \item{channel_shift_range}{shift range for each channels.} -\item{fill_mode}{One of "constant", "nearest", "reflect" or "wrap". -Points outside the boundaries of the input are filled according to -the given mode: +\item{fill_mode}{One of "constant", "nearest", "reflect" or "wrap". Points +outside the boundaries of the input are filled according to the given mode: \itemize{ \item "constant": \code{kkkkkkkk|abcd|kkkkkkkk} (\code{cval=k}) \item "nearest": \code{aaaaaaaa|abcd|dddddddd} @@ -81,8 +81,8 @@ other transformation).} \item{preprocessing_function}{function that will be implied on each input. The function will run before any other modification on it. The function -should take one argument: one image (tensor with rank 3), and should -output a tensor with the same shape.} +should take one argument: one image (tensor with rank 3), and should output +a tensor with the same shape.} \item{data_format}{'channels_first' or 'channels_last'. In 'channels_first' mode, the channels dimension (the depth) is at index 1, in 'channels_last' @@ -90,9 +90,14 @@ mode it is at index 3. It defaults to the \code{image_data_format} value found in your Keras config file at \verb{~/.keras/keras.json}. If you never set it, then it will be "channels_last".} -\item{validation_split}{fraction of images reserved for validation (strictly between 0 and 1).} +\item{validation_split}{fraction of images reserved for validation (strictly +between 0 and 1).} } \description{ -Generate batches of image data with real-time data augmentation. The data will be -looped over (in batches). +Deprecated: \code{image_data_generator} is not +recommended for new code. Prefer loading images with +\code{image_dataset_from_directory} and transforming the output +TF Dataset with preprocessing layers. For more information, see the +tutorials for loading images and augmenting images, as well as the +preprocessing layer guide. } diff --git a/tests/testthat/helper-utils.R b/tests/testthat/helper-utils.R index f74392728..be1eebf7e 100644 --- a/tests/testthat/helper-utils.R +++ b/tests/testthat/helper-utils.R @@ -1,11 +1,18 @@ -Sys.setenv(TF_CPP_MIN_LOG_LEVEL = 1) +# Sys.setenv(TF_CPP_MIN_LOG_LEVEL = 1) # 0 = all messages are logged (default behavior) # 1 = INFO messages are not printed # 2 = INFO and WARNING messages are not printed # 3 = INFO, WARNING, and ERROR messages are not printed -if(reticulate::virtualenv_exists("r-tensorflow")) +if(!reticulate::py_available() && reticulate::virtualenv_exists("r-tensorflow")) reticulate::use_virtualenv("r-tensorflow") + +if(reticulate::py_available()) { + print(reticulate::py_config()) +} else { + setHook("reticulate.onPyInit", function() print(reticulate::py_config())) +} + # Sys.setenv(RETICULATE_PYTHON = "~/.local/share/r-miniconda/envs/tf-2.7-cpu/bin/python") # Sys.setenv(RETICULATE_PYTHON = "~/.local/share/r-miniconda/envs/tf-nightly-cpu/bin/python") # reticulate::use_condaenv("tf-2.5-cpu", required = TRUE) @@ -147,3 +154,4 @@ local_tf_device <- function(device_name = "CPU") { withr::defer_parent(device$`__exit__`()) invisible(device) } + diff --git a/tests/testthat/test-callbacks.R b/tests/testthat/test-callbacks.R index 10c3809ee..afb671292 100644 --- a/tests/testthat/test-callbacks.R +++ b/tests/testthat/test-callbacks.R @@ -31,7 +31,9 @@ if (tensorflow::tf_version() <= "2.1") test_callback("progbar_logger", callback_progbar_logger()) -test_callback("model_checkpoint", callback_model_checkpoint(tempfile(fileext = ".h5")), h5py = TRUE) +test_callback("model_checkpoint", + callback_model_checkpoint(tempfile(fileext = ".keras")), + h5py = TRUE) if(tf_version() >= "2.8") test_callback("backup_and_restore", callback_backup_and_restore(tempfile())) @@ -252,7 +254,7 @@ test_succeeds("on predict/evaluation callbacks", { warns <- capture_warnings( out <- capture_output( - pred <- predict(model, gen, callbacks = cc, steps = 1) + pred <- predict(model, gen, callbacks = cc, steps = 5) ) ) expect_warns_and_out(warns, out) diff --git a/tests/testthat/test-metrics.R b/tests/testthat/test-metrics.R index 2e223e949..9373c11d5 100644 --- a/tests/testthat/test-metrics.R +++ b/tests/testthat/test-metrics.R @@ -143,7 +143,8 @@ test_metric <- function(metric, ...) { m <- metric(...) expect_s3_class(m, c("keras.metrics.Metric", - 'keras.metrics.base_metric.Metric')) + 'keras.metrics.base_metric.Metric', + 'keras.src.metrics.base_metric.Metric')) define_model() %>% compile(loss = loss,