Merge branch 'master' into keras.layers.activation-defaults-to

# Conflicts: # keras/layers/activation/leaky_relu.py # keras/layers/activation/relu.py # keras/layers/activation/softmax.py
keras-team · Apr 23, 2023 · d0ca2ee · d0ca2ee
2 parents 46449dc + 602cdf1
commit d0ca2ee
Show file tree

Hide file tree

Showing 113 changed files with 1,508 additions and 720 deletions.
diff --git a/keras/BUILD b/keras/BUILD
@@ -291,6 +291,7 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 6,
     tags = [
+        "no_pip",  # TODO(b/276923757)
         "no_tfrt",  # TODO(b/179690526)
         "notsan",
     ],

diff --git a/keras/activations.py b/keras/activations.py
@@ -573,8 +573,8 @@ def serialize(activation, use_legacy_format=False):
             f"Unknown activation function '{activation}' cannot be "
             "serialized due to invalid function name. Make sure to use "
             "an activation name that matches the references defined in "
-            "activations.py or use `@keras.utils.register_keras_serializable` "
-            "for any custom activations. "
+            "activations.py or use `@keras.utils.register_keras_serializable()`"
+            "to register any custom activations. "
             f"config={fn_config}"
         )
     if not isinstance(activation, types.FunctionType):

diff --git a/keras/api/golden/v2/tensorflow.keras.utils.-timed-thread.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.-timed-thread.pbtxt
@@ -0,0 +1,25 @@
+path: "tensorflow.keras.utils.TimedThread"
+tf_class {
+  is_instance: "<class \'keras.utils.timed_threads.TimedThread\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'interval\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "is_alive"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "on_interval"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "start"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "stop"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/keras/api/golden/v2/tensorflow.keras.utils.pbtxt b/keras/api/golden/v2/tensorflow.keras.utils.pbtxt
@@ -32,6 +32,10 @@ tf_module {
     name: "SidecarEvaluator"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "TimedThread"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "custom_object_scope"
     mtype: "<type \'type\'>"

diff --git a/keras/api/tests/BUILD b/keras/api/tests/BUILD
@@ -33,7 +33,6 @@ tf_py_test(
         "//:expect_six_installed",
         "//third_party/py/tensorflow",
         "//third_party/tensorflow/python:lib",
-        "//third_party/tensorflow/python:platform",
         "//third_party/tensorflow/tools/api/lib:python_object_to_proto_visitor",
         "//third_party/tensorflow/tools/common:public_api",
         "//third_party/tensorflow/tools/common:traverse",

diff --git a/keras/api/tests/api_compatibility_test.py b/keras/api/tests/api_compatibility_test.py
@@ -39,7 +39,6 @@
 from google.protobuf import message
 from google.protobuf import text_format
 from tensorflow.python.lib.io import file_io
-from tensorflow.python.platform import tf_logging as logging
 from tensorflow.tools.api.lib import api_objects_pb2
 from tensorflow.tools.api.lib import (
     python_object_to_proto_visitor,
@@ -262,14 +261,14 @@ def _AssertProtoDictEquals(
         # If diffs are found, handle them based on flags.
         if diffs:
             diff_count = len(diffs)
-            logging.error(self._test_readme_message)
-            logging.error(
+            tf.compat.v1.logging.error(self._test_readme_message)
+            tf.compat.v1.logging.error(
                 "%d differences found between API and golden.", diff_count
             )
 
             if update_goldens:
                 # Write files if requested.
-                logging.warning(self._update_golden_warning)
+                tf.compat.v1.logging.warning(self._update_golden_warning)
 
                 # If the keys are only in expected, some objects are deleted.
                 # Remove files.
@@ -288,15 +287,17 @@ def _AssertProtoDictEquals(
             else:
                 # Include the actual differences to help debugging.
                 for d, verbose_d in zip(diffs, verbose_diffs):
-                    logging.error("    %s", d)
-                    logging.error("    %s", verbose_d)
+                    tf.compat.v1.logging.error("    %s", d)
+                    tf.compat.v1.logging.error("    %s", verbose_d)
                 # Fail if we cannot fix the test by updating goldens.
                 self.fail(
                     "%d differences found between API and golden." % diff_count
                 )
 
         else:
-            logging.info("No differences found between API and golden.")
+            tf.compat.v1.logging.info(
+                "No differences found between API and golden."
+            )
 
     def _checkBackwardsCompatibility(
         self,

diff --git a/keras/applications/imagenet_utils.py b/keras/applications/imagenet_utils.py
@@ -56,10 +56,10 @@
       The preprocessed data are written over the input data
       if the data types are compatible. To avoid this
       behaviour, `numpy.copy(x)` can be used.
-    data_format: Optional data format of the image tensor/array. Defaults to
-      None, in which case the global setting
-      `tf.keras.backend.image_data_format()` is used (unless you changed it,
-      it defaults to "channels_last").{mode}
+    data_format: Optional data format of the image tensor/array. None, means
+      the global setting `tf.keras.backend.image_data_format()` is used
+      (unless you changed it, it uses "channels_last").{mode}
+      Defaults to `None`.
 
   Returns:
       Preprocessed `numpy.array` or a `tf.Tensor` with type `float32`.
@@ -70,7 +70,7 @@
   """
 
 PREPROCESS_INPUT_MODE_DOC = """
-    mode: One of "caffe", "tf" or "torch". Defaults to "caffe".
+    mode: One of "caffe", "tf" or "torch".
       - caffe: will convert the images from RGB to BGR,
           then will zero-center each color channel with
           respect to the ImageNet dataset,
@@ -80,6 +80,7 @@
       - torch: will scale pixels between 0 and 1 and then
           will normalize each channel with respect to the
           ImageNet dataset.
+      Defaults to "caffe".
   """
 
 PREPROCESS_INPUT_DEFAULT_ERROR_DOC = """

diff --git a/keras/applications/inception_v3.py b/keras/applications/inception_v3.py
@@ -82,13 +82,13 @@ def InceptionV3(
 
     Args:
       include_top: Boolean, whether to include the fully-connected
-        layer at the top, as the last layer of the network. Default to `True`.
+        layer at the top, as the last layer of the network. Defaults to `True`.
       weights: One of `None` (random initialization),
         `imagenet` (pre-training on ImageNet),
-        or the path to the weights file to be loaded. Default to `imagenet`.
+        or the path to the weights file to be loaded. Defaults to `imagenet`.
       input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`)
         to use as image input for the model. `input_tensor` is useful for
-        sharing inputs between multiple different networks. Default to None.
+        sharing inputs between multiple different networks. Defaults to `None`.
       input_shape: Optional shape tuple, only to be specified
         if `include_top` is False (otherwise the input shape
         has to be `(299, 299, 3)` (with `channels_last` data format)
@@ -108,7 +108,7 @@ def InceptionV3(
         - `max` means that global max pooling will be applied.
       classes: optional number of classes to classify images
         into, only to be specified if `include_top` is True, and
-        if no `weights` argument is specified. Default to 1000.
+        if no `weights` argument is specified. Defaults to 1000.
       classifier_activation: A `str` or callable. The activation function to use
         on the "top" layer. Ignored unless `include_top=True`. Set
         `classifier_activation=None` to return the logits of the "top" layer.

diff --git a/keras/applications/mobilenet.py b/keras/applications/mobilenet.py
@@ -124,25 +124,26 @@ def MobileNet(
         `channels_last` data format) or (3, 224, 224) (with `channels_first`
         data format). It should have exactly 3 inputs channels, and width and
         height should be no smaller than 32. E.g. `(200, 200, 3)` would be one
-        valid value. Default to `None`.
+        valid value. Defaults to `None`.
         `input_shape` will be ignored if the `input_tensor` is provided.
       alpha: Controls the width of the network. This is known as the width
         multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally
         decreases the number of filters in each layer. - If `alpha` > 1.0,
         proportionally increases the number of filters in each layer. - If
         `alpha` = 1, default number of filters from the paper are used at each
-        layer. Default to 1.0.
+        layer. Defaults to `1.0`.
       depth_multiplier: Depth multiplier for depthwise convolution. This is
-        called the resolution multiplier in the MobileNet paper. Default to 1.0.
-      dropout: Dropout rate. Default to 0.001.
+        called the resolution multiplier in the MobileNet paper.
+        Defaults to `1.0`.
+      dropout: Dropout rate. Defaults to `0.001`.
       include_top: Boolean, whether to include the fully-connected layer at the
-        top of the network. Default to `True`.
+        top of the network. Defaults to `True`.
       weights: One of `None` (random initialization), 'imagenet' (pre-training
-        on ImageNet), or the path to the weights file to be loaded. Default to
+        on ImageNet), or the path to the weights file to be loaded. Defaults to
         `imagenet`.
       input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to
         use as image input for the model. `input_tensor` is useful for sharing
-        inputs between multiple different networks. Default to None.
+        inputs between multiple different networks. Defaults to `None`.
       pooling: Optional pooling mode for feature extraction when `include_top`
         is `False`.
         - `None` (default) means that the output of the model will be
@@ -154,7 +155,7 @@ def MobileNet(
         - `max` means that global max pooling will be applied.
       classes: Optional number of classes to classify images into, only to be
         specified if `include_top` is True, and if no `weights` argument is
-        specified. Defaults to 1000.
+        specified. Defaults to `1000`.
       classifier_activation: A `str` or callable. The activation function to use
         on the "top" layer. Ignored unless `include_top=True`. Set
         `classifier_activation=None` to return the logits of the "top" layer.

diff --git a/keras/applications/mobilenet_v3.py b/keras/applications/mobilenet_v3.py
@@ -679,10 +679,10 @@ def preprocess_input(x, data_format=None):
 
     Args:
       x: A floating point `numpy.array` or a `tf.Tensor`.
-      data_format: Optional data format of the image tensor/array. Defaults to
-        None, in which case the global setting
-        `tf.keras.backend.image_data_format()` is used (unless you changed it,
-        it defaults to "channels_last").{mode}
+      data_format: Optional data format of the image tensor/array. `None` means
+        the global setting `tf.keras.backend.image_data_format()` is used
+        (unless you changed it, it uses "channels_last").
+        Defaults to `None`.
 
     Returns:
       Unchanged `numpy.array` or `tf.Tensor`.

diff --git a/keras/backend.py b/keras/backend.py
@@ -1901,8 +1901,8 @@ class RandomGenerator(tf.__internal__.tracking.AutoTrackable):
         When `rng_type` is "legacy_stateful", the seed will be passed down to
         stateful random ops.
       rng_type: Type of RNG to use, one of "stateful", "stateless",
-        "legacy_stateful". It defaults to "stateful" if
-        `enable_tf_random_generator` has been activated, or to
+        "legacy_stateful". When `None` it uses "stateful" if
+        `enable_tf_random_generator` has been activated, or
         "legacy_stateful" otherwise.
         - When using "stateless", the random ops outputs are constant (the same
           inputs result in the same outputs).
@@ -1913,6 +1913,7 @@ class RandomGenerator(tf.__internal__.tracking.AutoTrackable):
         - "legacy_stateful" is backed by TF1 stateful RNG ops
           (e.g. `tf.random.uniform`), while "stateful"
           is backed by TF2 APIs (e.g. `tf.random.Generator.uniform`).
+        Defaults to `None`.
     """
 
     RNG_STATELESS = "stateless"
@@ -5566,8 +5567,12 @@ def categorical_crossentropy(target, output, from_logits=False, axis=-1):
             labels=target, logits=output, axis=axis
         )
 
-    # scale preds so that the class probas of each sample sum to 1
+    # Adjust the predictions so that the probability of
+    # each class for every sample adds up to 1
+    # This is needed to ensure that the cross entropy is
+    # computed correctly.
     output = output / tf.reduce_sum(output, axis, True)
+
     # Compute cross entropy from probabilities.
     epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
     output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_)
@@ -5844,28 +5849,29 @@ def binary_focal_crossentropy(
     where `alpha` is a float in the range of `[0, 1]`.
 
     Args:
-      target: A tensor with the same shape as `output`.
-      output: A tensor.
-      apply_class_balancing: A bool, whether to apply weight balancing on the
-        binary classes 0 and 1.
-      alpha: A weight balancing factor for class 1, default is `0.25` as
-        mentioned in the reference. The weight for class 0 is `1.0 - alpha`.
-      gamma: A focusing parameter, default is `2.0` as mentioned in the
-        reference.
-      from_logits: Whether `output` is expected to be a logits tensor. By
-        default, we consider that `output` encodes a probability distribution.
+        target: A tensor with the same shape as `output`.
+        output: A tensor.
+        apply_class_balancing: A bool, whether to apply weight balancing on the
+            binary classes 0 and 1.
+        alpha: A weight balancing factor for class 1, default is `0.25` as
+            mentioned in the reference. The weight for class 0 is `1.0 - alpha`.
+        gamma: A focusing parameter, default is `2.0` as mentioned in the
+            reference.
+        from_logits: Whether `output` is expected to be a logits tensor. By
+            default, we consider that `output` encodes a probability
+            distribution.
 
     Returns:
-      A tensor.
+        A tensor.
     """
-    sigmoidal = tf.__internal__.smart_cond.smart_cond(
-        from_logits,
-        lambda: sigmoid(output),
-        lambda: output,
-    )
+
+    sigmoidal = sigmoid(output) if from_logits else output
+
     p_t = target * sigmoidal + (1 - target) * (1 - sigmoidal)
+
     # Calculate focal factor
     focal_factor = tf.pow(1.0 - p_t, gamma)
+
     # Binary crossentropy
     bce = binary_crossentropy(
         target=target,
@@ -5893,7 +5899,7 @@ def sigmoid(x):
     Returns:
         A tensor.
     """
-    return tf.sigmoid(x)
+    return tf.math.sigmoid(x)
 
 
 @keras_export("keras.backend.hard_sigmoid")
@@ -6318,13 +6324,13 @@ def separable_conv1d(
     pointwise_kernel = tf.expand_dims(pointwise_kernel, 0)
     dilation_rate = (1,) + dilation_rate
 
-    x = tf.compat.v1.nn.separable_conv2d(
+    x = tf.nn.separable_conv2d(
         x,
         depthwise_kernel,
         pointwise_kernel,
         strides=strides,
         padding=padding,
-        rate=dilation_rate,
+        dilations=dilation_rate,
         data_format=tf_data_format,
     )
 
@@ -6384,13 +6390,13 @@ def separable_conv2d(
     else:
         strides = (1, 1) + strides
 
-    x = tf.compat.v1.nn.separable_conv2d(
+    x = tf.nn.separable_conv2d(
         x,
         depthwise_kernel,
         pointwise_kernel,
         strides=strides,
         padding=padding,
-        rate=dilation_rate,
+        dilations=dilation_rate,
         data_format=tf_data_format,
     )
     if data_format == "channels_first" and tf_data_format == "NHWC":
@@ -6439,12 +6445,12 @@ def depthwise_conv2d(
     else:
         strides = (1, 1) + strides
 
-    x = tf.compat.v1.nn.depthwise_conv2d(
+    x = tf.nn.depthwise_conv2d(
         x,
         depthwise_kernel,
         strides=strides,
         padding=padding,
-        rate=dilation_rate,
+        dilations=dilation_rate,
         data_format=tf_data_format,
     )
     if data_format == "channels_first" and tf_data_format == "NHWC":
@@ -6898,11 +6904,11 @@ def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
     Args:
         shape: A tuple of integers, the shape of tensor to create.
         mean: A float, the mean value of the normal distribution to draw
-          samples. Default to 0.0.
+          samples. Defaults to `0.0`.
         stddev: A float, the standard deviation of the normal distribution
-          to draw samples. Default to 1.0.
-        dtype: `tf.dtypes.DType`, dtype of returned tensor. Default to use Keras
-          backend dtype which is float32.
+          to draw samples. Defaults to `1.0`.
+        dtype: `tf.dtypes.DType`, dtype of returned tensor. None uses Keras
+          backend dtype which is float32. Defaults to `None`.
         seed: Integer, random seed. Will use a random numpy integer when not
           specified.
 

diff --git a/keras/callbacks.py b/keras/callbacks.py
@@ -1889,9 +1889,21 @@ def on_train_begin(self, logs=None):
         self._training_state.restore()
 
     def on_train_batch_begin(self, batch, logs=None):
+        # Skip batch update for PSS Strategy
+        if isinstance(
+            self.model.distribute_strategy,
+            tf.distribute.ParameterServerStrategy,
+        ):
+            return
         self._training_state._ckpt_saved_batch.assign(batch)
 
     def on_train_batch_end(self, batch, logs=None):
+        # Skip batch update for PSS Strategy
+        if isinstance(
+            self.model.distribute_strategy,
+            tf.distribute.ParameterServerStrategy,
+        ):
+            return
         self._training_state.backup_if_preempted()
         if self.save_freq and self.save_freq != "epoch":
             self._batches_count += 1