From 2a704e1a69cc46c808209f45ce19f311b05fe19d Mon Sep 17 00:00:00 2001 From: IMvision12 <88665786+IMvision12@users.noreply.github.com> Date: Tue, 15 Aug 2023 04:55:44 +0530 Subject: [PATCH 01/17] [RandomTranslation] Supporting Segmentation Masks (#2024) * Added support for segmentation * Format * Update ReadMe * Demo * interpolation * Format * Format --- .../segmentation/random_translation_demo.py | 33 +++++++++++++++++++ keras_cv/layers/preprocessing/README.md | 2 +- .../preprocessing/random_translation.py | 28 ++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 examples/layers/preprocessing/segmentation/random_translation_demo.py diff --git a/examples/layers/preprocessing/segmentation/random_translation_demo.py b/examples/layers/preprocessing/segmentation/random_translation_demo.py new file mode 100644 index 0000000000..72abb9bcbb --- /dev/null +++ b/examples/layers/preprocessing/segmentation/random_translation_demo.py @@ -0,0 +1,33 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_translation_demo.py shows how to use the RandomTranslation +preprocessing layer. Uses the oxford iiit pet_dataset. In this +script the pets are loaded, then are passed through the +preprocessing layers. Finally, they are shown using matplotlib. +""" +import demo_utils +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +def main(): + ds = demo_utils.load_oxford_iiit_pet_dataset() + randomcutout = preprocessing.RandomTranslation(0.5, 0.5) + ds = ds.map(randomcutout, num_parallel_calls=tf.data.AUTOTUNE) + demo_utils.visualize_dataset(ds) + + +if __name__ == "__main__": + main() diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md index fcfacd8cd3..b3fef9e96c 100644 --- a/keras_cv/layers/preprocessing/README.md +++ b/keras_cv/layers/preprocessing/README.md @@ -38,7 +38,7 @@ The provided table gives an overview of the different augmentation layers availa | RandomSaturation | ✅ | ✅ | ✅ | ✅ | | RandomSharpness | ✅ | ✅ | ✅ | ✅ | | RandomShear | ✅ | ❌ | ✅ | ✅ | -| RandomTranslation | ✅ | ❌ | ✅ | ✅ | +| RandomTranslation | ✅ | ✅ | ✅ | ✅ | | RandomZoom | ✅ | ❌ | ❌ | ✅ | | RepeatedAugmentation + | - | - | - | - | | Rescaling | ❌ | ✅ | ✅ | ✅ | diff --git a/keras_cv/layers/preprocessing/random_translation.py b/keras_cv/layers/preprocessing/random_translation.py index 8b3a99a684..3fcb6d7daf 100644 --- a/keras_cv/layers/preprocessing/random_translation.py +++ b/keras_cv/layers/preprocessing/random_translation.py @@ -201,6 +201,34 @@ def augment_images(self, images, transformations, **kwargs): def augment_labels(self, labels, transformations, **kwargs): return labels + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): + segmentation_masks = preprocessing_utils.ensure_tensor( + segmentation_masks, self.compute_dtype + ) + original_shape = segmentation_masks.shape + mask_shape = tf.shape(segmentation_masks) + img_hd = tf.cast(mask_shape[H_AXIS], tf.float32) + img_wd = tf.cast(mask_shape[W_AXIS], tf.float32) + height_translations = transformations["height_translations"] + width_translations = transformations["width_translations"] + height_translations = height_translations * img_hd + width_translations = width_translations * img_wd + translations = tf.cast( + tf.concat([width_translations, height_translations], axis=1), + dtype=tf.float32, + ) + output = preprocessing_utils.transform( + segmentation_masks, + preprocessing_utils.get_translation_matrix(translations), + interpolation="nearest", + fill_mode=self.fill_mode, + fill_value=self.fill_value, + ) + output.set_shape(original_shape) + return output + def augment_bounding_boxes( self, bounding_boxes, transformations, images=None, **kwargs ): From d611c83e73e86a829893eb1d64eb207ecfb4a37b Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Tue, 15 Aug 2023 11:48:23 -0600 Subject: [PATCH 02/17] Restructure retinanet steps to avoid modifying args (#2029) --- .../object_detection/retinanet/retinanet.py | 59 +++++-------------- 1 file changed, 14 insertions(+), 45 deletions(-) diff --git a/keras_cv/models/object_detection/retinanet/retinanet.py b/keras_cv/models/object_detection/retinanet/retinanet.py index 4c9fc5909d..3dd42c185c 100644 --- a/keras_cv/models/object_detection/retinanet/retinanet.py +++ b/keras_cv/models/object_detection/retinanet/retinanet.py @@ -396,10 +396,18 @@ def compile( super().compile(loss=losses, **kwargs) def compute_loss(self, x, y, y_pred, sample_weight, **kwargs): + y_for_label_encoder = bounding_box.convert_format( + y, + source=self.bounding_box_format, + target=self.label_encoder.bounding_box_format, + images=x, + ) + + boxes, classes = self.label_encoder(x, y_for_label_encoder) + box_pred = y_pred["box"] cls_pred = y_pred["classification"] - boxes = y["box"] - classes = y["classification"] + if boxes.shape[-1] != 4: raise ValueError( "boxes should have shape (None, None, 4). Got " @@ -453,50 +461,15 @@ def compute_loss(self, x, y, y_pred, sample_weight, **kwargs): def train_step(self, *args): data = args[-1] + args = args[:-1] x, y = unpack_input(data) - - y_for_label_encoder = bounding_box.convert_format( - y, - source=self.bounding_box_format, - target=self.label_encoder.bounding_box_format, - images=x, - ) - - boxes, classes = self.label_encoder(x, y_for_label_encoder) - super_args = args[:-1] + ( - ( - x, - {"box": boxes, "classification": classes, "unencoded": y}, - ), - ) - - return super().train_step(*super_args) + return super().train_step(*args, (x, y)) def test_step(self, *args): data = args[-1] + args = args[:-1] x, y = unpack_input(data) - y_for_label_encoder = bounding_box.convert_format( - y, - source=self.bounding_box_format, - target=self.label_encoder.bounding_box_format, - images=x, - ) - boxes, classes = self.label_encoder(x, y_for_label_encoder) - boxes = bounding_box.convert_format( - boxes, - source=self.label_encoder.bounding_box_format, - target=self.bounding_box_format, - images=x, - ) - - super_args = args[:-1] + ( - ( - x, - {"box": boxes, "classification": classes, "unencoded": y}, - ), - ) - - return super().test_step(*super_args) + return super().test_step(*args, (x, y)) def compute_metrics(self, x, y, y_pred, sample_weight): metrics = {} @@ -505,10 +478,6 @@ def compute_metrics(self, x, y, y_pred, sample_weight): if not self._has_user_metrics: return metrics - # For computing non-loss metrics, we don't care about the encoded - # boxes and classes, just the raw input boxes. - y = y["unencoded"] - y_pred = self.decode_predictions(y_pred, x) for metric in self._user_metrics: From db57e1d0dab58e97c7cedfe1db2fe6a3dd7750db Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Wed, 16 Aug 2023 10:17:19 -0600 Subject: [PATCH 03/17] Use non-ragged outputs in MultiClassNMS (#2030) * Use non-ragged outputs in MultiClassNMS Using Ragged outputs that weren't subsequently padded was causing issues in the PyCOCOCallback, and we shouldn't silently default to Ragged anywhere. * Update multi_class_non_max_suppression_test.py --- .../object_detection/multi_class_non_max_suppression.py | 2 +- .../multi_class_non_max_suppression_test.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression.py index 3d34eafbf2..7825268578 100644 --- a/keras_cv/layers/object_detection/multi_class_non_max_suppression.py +++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression.py @@ -123,7 +123,7 @@ def call( } # this is required to comply with KerasCV bounding box format. return bounding_box.mask_invalid_detections( - bounding_boxes, output_ragged=True + bounding_boxes, output_ragged=False ) def get_config(self): diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py index f019e182d7..19eef623ff 100644 --- a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py +++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py @@ -47,6 +47,6 @@ def decode_predictions_output_shapes(): class NmsPredictionDecoderTest(TestCase): def test_decode_predictions_output_shapes(self): result = decode_predictions_output_shapes() - self.assertEqual(result["boxes"].shape, [8, None, 4]) - self.assertEqual(result["classes"].shape, [8, None]) - self.assertEqual(result["confidence"].shape, [8, None]) + self.assertEqual(result["boxes"].shape, [8, 100, 4]) + self.assertEqual(result["classes"].shape, [8, 100]) + self.assertEqual(result["confidence"].shape, [8, 100]) From 39278475b0751ce94d0f4c1c2673040b66ba17a1 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Wed, 16 Aug 2023 10:42:11 -0600 Subject: [PATCH 04/17] Use correct convention for static shape in point_cloud grouping (#2028) * Use correct convention for static shape in point_cloud grouping * Use len(shape) for rank --- keras_cv/point_cloud/point_cloud.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras_cv/point_cloud/point_cloud.py b/keras_cv/point_cloud/point_cloud.py index 5e27e8b62c..ad345f915d 100644 --- a/keras_cv/point_cloud/point_cloud.py +++ b/keras_cv/point_cloud/point_cloud.py @@ -72,10 +72,10 @@ def group_points_by_boxes(points, boxes): box, all the point indices that belong to the box. """ - num_boxes = boxes.get_shape().as_list()[-2] or tf.shape(boxes)[-2] + num_boxes = boxes.shape[-2] or tf.shape(boxes)[-2] # [..., num_points] box_indices = within_box3d_index(points, boxes) - num_points = points.get_shape().as_list()[-2] or tf.shape(points)[-2] + num_points = points.shape[-2] or tf.shape(points)[-2] point_indices = tf.range(num_points, dtype=tf.int32) def group_per_sample(box_index): @@ -87,7 +87,7 @@ def group_per_sample(box_index): ) return res - boxes_rank = boxes.shape.rank + boxes_rank = len(boxes.shape) if boxes_rank == 2: return group_per_sample(box_indices) elif boxes_rank == 3: From 1eabccb30908a1a10143c0c46204443d7c7825d5 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Thu, 17 Aug 2023 07:46:23 +0530 Subject: [PATCH 05/17] Migrate Efficientnetlite to Backbone (#1877) * initial commit with moved files+added files * removed unwanted * added presets * backbone file changes * aliases file added * backbone fix * inits * format and lint * format and lint * add test+format and lint * update params * reviewed comments * fix * fix * updated test backbone * review changes from #1716 * fix * port * port:typo+fix * port: fix test error * port: update * fix backbone * fix preset in aliases * nits --- keras_cv/models/__init__.py | 18 + keras_cv/models/backbones/backbone_presets.py | 5 + .../backbones/efficientnet_lite/__init__.py | 13 + .../efficientnet_lite_aliases.py | 228 ++++++ .../efficientnet_lite_backbone.py | 366 ++++++++++ .../efficientnet_lite_backbone_presets.py | 175 +++++ ...efficientnet_lite_backbone_presets_test.py | 60 ++ .../efficientnet_lite_backbone_test.py | 162 +++++ keras_cv/models/legacy/__init__.py | 5 - keras_cv/models/legacy/efficientnet_lite.py | 678 ------------------ .../models/legacy/efficientnet_lite_test.py | 55 -- 11 files changed, 1027 insertions(+), 738 deletions(-) create mode 100644 keras_cv/models/backbones/efficientnet_lite/__init__.py create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py delete mode 100644 keras_cv/models/legacy/efficientnet_lite.py delete mode 100644 keras_cv/models/legacy/efficientnet_lite_test.py diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py index 3e5847a346..1861b49c03 100644 --- a/keras_cv/models/__init__.py +++ b/keras_cv/models/__init__.py @@ -43,6 +43,24 @@ from keras_cv.models.backbones.densenet.densenet_backbone import ( DenseNetBackbone, ) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB0Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB1Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB2Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB3Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB4Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501 + EfficientNetLiteBackbone, +) from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import ( EfficientNetV2B0Backbone, ) diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py index 1535d91374..3852b28877 100644 --- a/keras_cv/models/backbones/backbone_presets.py +++ b/keras_cv/models/backbones/backbone_presets.py @@ -16,6 +16,9 @@ from keras_cv.models.backbones.csp_darknet import csp_darknet_backbone_presets from keras_cv.models.backbones.densenet import densenet_backbone_presets +from keras_cv.models.backbones.efficientnet_lite import ( + efficientnet_lite_backbone_presets, +) from keras_cv.models.backbones.efficientnet_v2 import ( efficientnet_v2_backbone_presets, ) @@ -31,6 +34,7 @@ **csp_darknet_backbone_presets.backbone_presets_no_weights, **efficientnet_v2_backbone_presets.backbone_presets_no_weights, **densenet_backbone_presets.backbone_presets_no_weights, + **efficientnet_lite_backbone_presets.backbone_presets_no_weights, **yolo_v8_backbone_presets.backbone_presets_no_weights, } @@ -41,6 +45,7 @@ **csp_darknet_backbone_presets.backbone_presets_with_weights, **efficientnet_v2_backbone_presets.backbone_presets_with_weights, **densenet_backbone_presets.backbone_presets_with_weights, + **efficientnet_lite_backbone_presets.backbone_presets_with_weights, **yolo_v8_backbone_presets.backbone_presets_with_weights, } diff --git a/keras_cv/models/backbones/efficientnet_lite/__init__.py b/keras_cv/models/backbones/efficientnet_lite/__init__.py new file mode 100644 index 0000000000..3992ffb59a --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_lite/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py new file mode 100644 index 0000000000..1a8fe92404 --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py @@ -0,0 +1,228 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501 + EfficientNetLiteBackbone, +) +from keras_cv.utils.python_utils import classproperty + +ALIAS_DOCSTRING = """Instantiates the {name} architecture. + + Reference: + - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) + (ICML 2019) + + Args: + include_rescaling: bool, whether to rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + input_shape: optional shape tuple, defaults to (None, None, 3). + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + Usage: + ```python + input_data = np.ones(shape=(8, 224, 224, 3)) + + # Randomly initialized backbone + model = {name}Backbone() + output = model(input_data) + ``` +""" # noqa: E501 + + +class EfficientNetLiteB0Backbone(EfficientNetLiteBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetLiteBackbone.from_preset( + "efficientnetlite_b0", **kwargs + ) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetLiteB1Backbone(EfficientNetLiteBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetLiteBackbone.from_preset( + "efficientnetlite_b1", **kwargs + ) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetLiteB2Backbone(EfficientNetLiteBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetLiteBackbone.from_preset( + "efficientnetlite_b2", **kwargs + ) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetLiteB3Backbone(EfficientNetLiteBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetLiteBackbone.from_preset( + "efficientnetlite_b3", **kwargs + ) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetLiteB4Backbone(EfficientNetLiteBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetLiteBackbone.from_preset( + "efficientnetlite_b4", **kwargs + ) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +setattr( + EfficientNetLiteB0Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetLiteB0"), +) +setattr( + EfficientNetLiteB1Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetLiteB1"), +) +setattr( + EfficientNetLiteB2Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetLiteB2"), +) +setattr( + EfficientNetLiteB3Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetLiteB3"), +) +setattr( + EfficientNetLiteB4Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetLiteB4"), +) diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py new file mode 100644 index 0000000000..d3a6fd8815 --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py @@ -0,0 +1,366 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""EfficientNet Lite backbone model. + +Reference: + - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) + (ICML 2019) + - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) +""" # noqa: E501 + +import copy +import math + +from keras_cv.backend import keras +from keras_cv.models import utils +from keras_cv.models.backbones.backbone import Backbone +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone_presets import ( # noqa: E501 + backbone_presets, +) +from keras_cv.utils.python_utils import classproperty + +BN_AXIS = 3 + + +@keras.saving.register_keras_serializable(package="keras_cv.models") +class EfficientNetLiteBackbone(Backbone): + """Instantiates the EfficientNetLite architecture using given scaling + coefficients. + + Reference: + - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) + (ICML 2019) + - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) + + Args: + include_rescaling: whether to rescale the inputs. If set to True, + inputs will be passed through a `Rescaling(1/255.0)` layer. + width_coefficient: float, scaling coefficient for network width. + depth_coefficient: float, scaling coefficient for network depth. + dropout_rate: float, dropout rate before final classifier layer. + drop_connect_rate: float, dropout rate at skip connections. The + default value is set to 0.2. + depth_divisor: integer, a unit of network width. The default value + is set to 8. + activation: activation function. + input_shape: optional shape tuple, + It should have exactly 3 inputs channels. + input_tensor: optional Keras tensor (i.e. output of `keras.layers.Input()`) + to use as image input for the model. + + Usage: + ```python + # Construct an EfficientNetLite from a preset: + efficientnet = models.EfficientNetLiteBackbone.from_preset( + "efficientnetlite_b0" + ) + images = np.ones((1, 256, 256, 3)) + outputs = efficientnet.predict(images) + + # Alternatively, you can also customize the EfficientNetLite architecture: + model = EfficientNetLiteBackbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=False, + ) + images = np.ones((1, 256, 256, 3)) + outputs = model.predict(images) + ``` + """ # noqa: E501 + + def __init__( + self, + *, + include_rescaling, + width_coefficient, + depth_coefficient, + stackwise_kernel_sizes, + stackwise_num_repeats, + stackwise_input_filters, + stackwise_output_filters, + stackwise_expansion_ratios, + stackwise_strides, + dropout_rate=0.2, + drop_connect_rate=0.2, + depth_divisor=8, + input_shape=(None, None, 3), + input_tensor=None, + activation="relu6", + **kwargs, + ): + img_input = utils.parse_model_inputs(input_shape, input_tensor) + + # Build stem + x = img_input + + if include_rescaling: + # Use common rescaling strategy across keras_cv + x = keras.layers.Rescaling(1.0 / 255.0)(x) + + x = keras.layers.ZeroPadding2D( + padding=utils.correct_pad_downsample(x, 3), name="stem_conv_pad" + )(x) + x = keras.layers.Conv2D( + 32, + 3, + strides=2, + padding="valid", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name="stem_conv", + )(x) + x = keras.layers.BatchNormalization(axis=BN_AXIS, name="stem_bn")(x) + x = keras.layers.Activation(activation, name="stem_activation")(x) + + # Build blocks + block_id = 0 + blocks = float(sum(stackwise_num_repeats)) + + pyramid_level_inputs = [] + + for i in range(len(stackwise_kernel_sizes)): + num_repeats = stackwise_num_repeats[i] + input_filters = stackwise_input_filters[i] + output_filters = stackwise_output_filters[i] + # Update block input and output filters based on depth multiplier. + input_filters = round_filters( + filters=input_filters, + width_coefficient=width_coefficient, + depth_divisor=depth_divisor, + ) + output_filters = round_filters( + filters=output_filters, + width_coefficient=width_coefficient, + depth_divisor=depth_divisor, + ) + + if i == 0 or i == (len(stackwise_kernel_sizes) - 1): + repeats = num_repeats + else: + repeats = round_repeats( + repeats=num_repeats, + depth_coefficient=depth_coefficient, + ) + strides = stackwise_strides[i] + + for j in range(repeats): + # The first block needs to take care of stride and filter size + # increase. + if j > 0: + strides = 1 + input_filters = output_filters + + if strides != 1: + pyramid_level_inputs.append(utils.get_tensor_input_name(x)) + + # 97 is the start of the lowercase alphabet. + letter_identifier = chr(j + 97) + x = apply_efficient_net_lite_block( + inputs=x, + filters_in=input_filters, + filters_out=output_filters, + kernel_size=stackwise_kernel_sizes[i], + strides=strides, + expand_ratio=stackwise_expansion_ratios[i], + activation=activation, + dropout_rate=drop_connect_rate * block_id / blocks, + name="block{}{}_".format(i + 1, letter_identifier), + ) + block_id += 1 + + # Build top + x = keras.layers.Conv2D( + 1280, + 1, + padding="same", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name="top_conv", + )(x) + x = keras.layers.BatchNormalization(axis=BN_AXIS, name="top_bn")(x) + x = keras.layers.Activation(activation, name="top_activation")(x) + + pyramid_level_inputs.append(utils.get_tensor_input_name(x)) + + # Create model. + super().__init__(inputs=img_input, outputs=x, **kwargs) + + self.include_rescaling = include_rescaling + self.width_coefficient = width_coefficient + self.depth_coefficient = depth_coefficient + self.dropout_rate = dropout_rate + self.drop_connect_rate = drop_connect_rate + self.depth_divisor = depth_divisor + self.activation = activation + self.input_tensor = input_tensor + self.pyramid_level_inputs = { + f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs) + } + self.stackwise_kernel_sizes = stackwise_kernel_sizes + self.stackwise_num_repeats = stackwise_num_repeats + self.stackwise_input_filters = stackwise_input_filters + self.stackwise_output_filters = stackwise_output_filters + self.stackwise_expansion_ratios = stackwise_expansion_ratios + self.stackwise_strides = stackwise_strides + + def get_config(self): + config = super().get_config() + config.update( + { + "include_rescaling": self.include_rescaling, + "width_coefficient": self.width_coefficient, + "depth_coefficient": self.depth_coefficient, + "dropout_rate": self.dropout_rate, + "drop_connect_rate": self.drop_connect_rate, + "depth_divisor": self.depth_divisor, + "activation": self.activation, + "input_tensor": self.input_tensor, + "input_shape": self.input_shape[1:], + "stackwise_kernel_sizes": self.stackwise_kernel_sizes, + "stackwise_num_repeats": self.stackwise_num_repeats, + "stackwise_input_filters": self.stackwise_input_filters, + "stackwise_output_filters": self.stackwise_output_filters, + "stackwise_expansion_ratios": self.stackwise_expansion_ratios, + "stackwise_strides": self.stackwise_strides, + } + ) + return config + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return copy.deepcopy(backbone_presets) + + +def conv_kernel_initializer(scale=2.0): + return keras.initializers.VarianceScaling( + scale=scale, mode="fan_out", distribution="truncated_normal" + ) + + +def round_filters(filters, depth_divisor, width_coefficient): + """Round number of filters based on depth multiplier.""" + filters *= width_coefficient + new_filters = max( + depth_divisor, + int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, + ) + # Make sure that round down does not go down by more than 10%. + if new_filters < 0.9 * filters: + new_filters += depth_divisor + return int(new_filters) + + +def round_repeats(repeats, depth_coefficient): + """Round number of repeats based on depth multiplier.""" + return int(math.ceil(depth_coefficient * repeats)) + + +def apply_efficient_net_lite_block( + inputs, + activation="relu6", + dropout_rate=0.0, + name=None, + filters_in=32, + filters_out=16, + kernel_size=3, + strides=1, + expand_ratio=1, +): + """An inverted residual block, without SE phase. + + Args: + inputs: input tensor. + activation: activation function. + dropout_rate: float between 0 and 1, fraction of the input units to drop. + name: string, block label. + filters_in: integer, the number of input filters. + filters_out: integer, the number of output filters. + kernel_size: integer, the dimension of the convolution window. + strides: integer, the stride of the convolution. + expand_ratio: integer, scaling coefficient for the input filters. + + Returns: + output tensor for the block. + """ # noqa: E501 + if name is None: + name = f"block_{keras.backend.get_uid('block_')}_" + + # Expansion phase + filters = filters_in * expand_ratio + if expand_ratio != 1: + x = keras.layers.Conv2D( + filters, + 1, + padding="same", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name=name + "expand_conv", + )(inputs) + x = keras.layers.BatchNormalization( + axis=BN_AXIS, name=name + "expand_bn" + )(x) + x = keras.layers.Activation( + activation, name=name + "expand_activation" + )(x) + else: + x = inputs + + # Depthwise Convolution + if strides == 2: + x = keras.layers.ZeroPadding2D( + padding=utils.correct_pad_downsample(x, kernel_size), + name=name + "dwconv_pad", + )(x) + conv_pad = "valid" + else: + conv_pad = "same" + x = keras.layers.DepthwiseConv2D( + kernel_size, + strides=strides, + padding=conv_pad, + use_bias=False, + depthwise_initializer=conv_kernel_initializer(), + name=name + "dwconv", + )(x) + x = keras.layers.BatchNormalization(axis=BN_AXIS, name=name + "bn")(x) + x = keras.layers.Activation(activation, name=name + "activation")(x) + + # Output phase + x = keras.layers.Conv2D( + filters_out, + 1, + padding="same", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name=name + "project_conv", + )(x) + x = keras.layers.BatchNormalization(axis=BN_AXIS, name=name + "project_bn")( + x + ) + if strides == 1 and filters_in == filters_out: + if dropout_rate > 0: + x = keras.layers.Dropout( + dropout_rate, noise_shape=(None, 1, 1, 1), name=name + "drop" + )(x) + x = keras.layers.add([x, inputs], name=name + "add") + return x diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py new file mode 100644 index 0000000000..db9838e3de --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py @@ -0,0 +1,175 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""EfficientNetLite model preset configurations.""" + +backbone_presets_no_weights = { + "efficientnetlite_b0": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.0` and `depth_coefficient=1.0`." + ), + "params": 3414176, + "official_name": "EfficientNetLite", + "path": "EfficientNetLite", + }, + "class_name": "keras_cv.models>EfficientNetLiteBackbone", + "config": { + "width_coefficient": 1.0, + "depth_coefficient": 1.0, + "dropout_rate": 0.2, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "relu6", + }, + }, + "efficientnetlite_b1": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.0` and `depth_coefficient=1.1`." + ), + "params": 4190496, + "official_name": "EfficientNetLite", + "path": "EfficientNetLite", + }, + "class_name": "keras_cv.models>EfficientNetLiteBackbone", + "config": { + "width_coefficient": 1.0, + "depth_coefficient": 1.1, + "dropout_rate": 0.2, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "relu6", + }, + }, + "efficientnetlite_b2": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.1` and `depth_coefficient=1.2`." + ), + "params": 4870320, + "official_name": "EfficientNetLite", + "path": "EfficientNetLite", + }, + "class_name": "keras_cv.models>EfficientNetLiteBackbone", + "config": { + "width_coefficient": 1.1, + "depth_coefficient": 1.2, + "dropout_rate": 0.3, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "relu6", + }, + }, + "efficientnetlite_b3": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.2` and `depth_coefficient=1.4`." + ), + "params": 6994504, + "official_name": "EfficientNetLite", + "path": "EfficientNetLite", + }, + "class_name": "keras_cv.models>EfficientNetLiteBackbone", + "config": { + "width_coefficient": 1.2, + "depth_coefficient": 1.4, + "dropout_rate": 0.3, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "relu6", + }, + }, + "efficientnetlite_b4": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.4` and `depth_coefficient=1.8`." + ), + "params": 11840256, + "official_name": "EfficientNetLite", + "path": "EfficientNetLite", + }, + "class_name": "keras_cv.models>EfficientNetLiteBackbone", + "config": { + "width_coefficient": 1.4, + "depth_coefficient": 1.8, + "dropout_rate": 0.3, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "relu6", + }, + }, +} + +backbone_presets_with_weights = {} + +backbone_presets = { + **backbone_presets_no_weights, + **backbone_presets_with_weights, +} diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py new file mode 100644 index 0000000000..d4e783141e --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py @@ -0,0 +1,60 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from absl.testing import parameterized + +from keras_cv.backend import keras +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB0Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501 + EfficientNetLiteBackbone, +) +from keras_cv.tests.test_case import TestCase +from keras_cv.utils.train import get_feature_extractor + + +@pytest.mark.extra_large +class EfficientNetLitePresetFullTest(TestCase): + """ + Test the full enumeration of our preset. + This tests every preset for EfficientNetLite and is only run manually. + Run with: + `pytest keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py --run_extra_large` + """ # noqa: E501 + + @parameterized.named_parameters( + *[(preset, preset) for preset in EfficientNetLiteBackbone.presets] + ) + def test_load_efficientnetlite(self, preset): + input_data = np.ones(shape=(2, 224, 224, 3)) + model = EfficientNetLiteBackbone.from_preset(preset) + model(input_data) + + def test_efficientnetlite_feature_extractor(self): + model = EfficientNetLiteB0Backbone( + include_rescaling=False, + input_shape=[256, 256, 3], + ) + levels = ["P3", "P4"] + layer_names = [model.pyramid_level_inputs[level] for level in levels] + backbone_model = get_feature_extractor(model, layer_names, levels) + inputs = keras.Input(shape=[256, 256, 3]) + outputs = backbone_model(inputs) + self.assertLen(outputs, 2) + self.assertEquals(list(outputs.keys()), levels) + self.assertEquals(outputs["P3"].shape[:3], (None, 32, 32)) + self.assertEquals(outputs["P4"].shape[:3], (None, 16, 16)) diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py new file mode 100644 index 0000000000..195e6ea0cf --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py @@ -0,0 +1,162 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import pytest +from absl.testing import parameterized + +from keras_cv.backend import keras +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501 + EfficientNetLiteB0Backbone, +) +from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501 + EfficientNetLiteBackbone, +) +from keras_cv.tests.test_case import TestCase +from keras_cv.utils.train import get_feature_extractor + + +class EfficientNetLiteBackboneTest(TestCase): + def setUp(self): + self.input_batch = np.ones(shape=(8, 224, 224, 3)) + + def test_valid_call(self): + model = EfficientNetLiteBackbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=False, + ) + model(self.input_batch) + + def test_valid_call_alias_model_with_rescaling(self): + model = EfficientNetLiteB0Backbone(include_rescaling=True) + model(self.input_batch) + + def test_valid_call_with_rescaling(self): + model = EfficientNetLiteBackbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=True, + ) + model(self.input_batch) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_model(self): + model = EfficientNetLiteBackbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=True, + ) + model_output = model(self.input_batch) + save_path = os.path.join( + self.get_temp_dir(), "efficientnet_lite_backbone.keras" + ) + model.save(save_path) + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + self.assertIsInstance(restored_model, EfficientNetLiteBackbone) + + # Check that output matches. + restored_output = restored_model(self.input_batch) + self.assertAllClose(model_output, restored_output) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_alias_model(self): + model = EfficientNetLiteB0Backbone() + model_output = model(self.input_batch) + save_path = os.path.join( + self.get_temp_dir(), "efficientnet_lite_backbone.keras" + ) + model.save(save_path) + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + # Note that these aliases serialized as the base class + self.assertIsInstance(restored_model, EfficientNetLiteBackbone) + + # Check that output matches. + restored_output = restored_model(self.input_batch) + self.assertAllClose(model_output, restored_output) + + def test_feature_pyramid_inputs(self): + model = EfficientNetLiteB0Backbone() + backbone_model = get_feature_extractor( + model, + model.pyramid_level_inputs.values(), + model.pyramid_level_inputs.keys(), + ) + input_size = 256 + inputs = keras.Input(shape=[input_size, input_size, 3]) + outputs = backbone_model(inputs) + levels = ["P1", "P2", "P3", "P4", "P5"] + self.assertEquals(list(outputs.keys()), levels) + self.assertEquals( + outputs["P1"].shape, + (None, input_size // 2**1, input_size // 2**1, 16), + ) + self.assertEquals( + outputs["P2"].shape, + (None, input_size // 2**2, input_size // 2**2, 24), + ) + self.assertEquals( + outputs["P3"].shape, + (None, input_size // 2**3, input_size // 2**3, 40), + ) + self.assertEquals( + outputs["P4"].shape, + (None, input_size // 2**4, input_size // 2**4, 112), + ) + self.assertEquals( + outputs["P5"].shape, + (None, input_size // 2**5, input_size // 2**5, 1280), + ) + + @parameterized.named_parameters( + ("one_channel", 1), + ("four_channels", 4), + ) + def test_application_variable_input_channels(self, num_channels): + model = EfficientNetLiteBackbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=True, + ) + self.assertEqual(model.output_shape, (None, None, None, 1280)) diff --git a/keras_cv/models/legacy/__init__.py b/keras_cv/models/legacy/__init__.py index 794687b9c7..20df5826f0 100644 --- a/keras_cv/models/legacy/__init__.py +++ b/keras_cv/models/legacy/__init__.py @@ -24,11 +24,6 @@ from keras_cv.models.legacy.convnext import ConvNeXtXLarge from keras_cv.models.legacy.darknet import DarkNet21 from keras_cv.models.legacy.darknet import DarkNet53 -from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB0 -from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB1 -from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB2 -from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB3 -from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB4 from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB0 from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB1 from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB2 diff --git a/keras_cv/models/legacy/efficientnet_lite.py b/keras_cv/models/legacy/efficientnet_lite.py deleted file mode 100644 index a2ae8d3606..0000000000 --- a/keras_cv/models/legacy/efficientnet_lite.py +++ /dev/null @@ -1,678 +0,0 @@ -# Copyright 2023 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""EfficientNet Lite models for Keras. - -Reference: - - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) - (ICML 2019) - - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) -""" # noqa: E501 - -import copy -import math - -import tensorflow as tf -from keras import backend -from keras import layers -from tensorflow import keras - -from keras_cv.models.legacy import utils -from keras_cv.models.legacy.weights import parse_weights -from keras_cv.models.utils import correct_pad_downsample - -DEFAULT_BLOCKS_ARGS = [ - { - "kernel_size": 3, - "repeats": 1, - "filters_in": 32, - "filters_out": 16, - "expand_ratio": 1, - "id_skip": True, - "strides": 1, - }, - { - "kernel_size": 3, - "repeats": 2, - "filters_in": 16, - "filters_out": 24, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - }, - { - "kernel_size": 5, - "repeats": 2, - "filters_in": 24, - "filters_out": 40, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - }, - { - "kernel_size": 3, - "repeats": 3, - "filters_in": 40, - "filters_out": 80, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - }, - { - "kernel_size": 5, - "repeats": 3, - "filters_in": 80, - "filters_out": 112, - "expand_ratio": 6, - "id_skip": True, - "strides": 1, - }, - { - "kernel_size": 5, - "repeats": 4, - "filters_in": 112, - "filters_out": 192, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - }, - { - "kernel_size": 3, - "repeats": 1, - "filters_in": 192, - "filters_out": 320, - "expand_ratio": 6, - "id_skip": True, - "strides": 1, - }, -] -CONV_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 2.0, - "mode": "fan_out", - "distribution": "truncated_normal", - }, -} - -DENSE_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 1.0 / 3.0, - "mode": "fan_out", - "distribution": "uniform", - }, -} - -BASE_DOCSTRING = """Instantiates the {name} architecture. - - Reference: - - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) - (ICML 2019) - - This function returns a Keras {name} model. - - For image classification use cases, see [this page for detailed examples](https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). - - Args: - include_rescaling: bool, whether to rescale the inputs. If set - to `True`, inputs will be passed through a `Rescaling(1/255.0)` - layer. - include_top: bool, whether to include the fully-connected layer at the - top of the network. If provided, `num_classes` must be provided. - num_classes: optional int, number of classes to classify images into - (only to be specified if `include_top` is `True`). - weights: one of `None` (random initialization), a pretrained weight file - path, or a reference to pre-trained weights (e.g. - 'imagenet/classification')(see available pre-trained weights in - weights.py) - input_shape: optional shape tuple, defaults to (None, None, 3). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be the 4D tensor - output of the last convolutional block. - - `avg` means that global average pooling will be applied to the - output of the last convolutional block, and thus the output of - the model will be a 2D tensor. - - `max` means that global max pooling will be applied. - classifier_activation: A `str` or callable. The activation function to - use on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" - layer. - name: (Optional) name to pass to the model, defaults to "{name}". - - Returns: - A `keras.Model` instance. -""" # noqa: E501 - -BN_AXIS = 3 - - -def round_filters(filters, depth_divisor, width_coefficient): - """Round number of filters based on depth multiplier.""" - filters *= width_coefficient - new_filters = max( - depth_divisor, - int(filters + depth_divisor / 2) // depth_divisor * depth_divisor, - ) - # Make sure that round down does not go down by more than 10%. - if new_filters < 0.9 * filters: - new_filters += depth_divisor - return int(new_filters) - - -def round_repeats(repeats, depth_coefficient): - """Round number of repeats based on depth multiplier.""" - return int(math.ceil(depth_coefficient * repeats)) - - -def apply_efficient_net_lite_block( - inputs, - activation="relu6", - drop_rate=0.0, - name=None, - filters_in=32, - filters_out=16, - kernel_size=3, - strides=1, - expand_ratio=1, - id_skip=True, -): - """An inverted residual block, without SE phase. - - Args: - inputs: input tensor. - activation: activation function. - drop_rate: float between 0 and 1, fraction of the input units to drop. - name: string, block label. - filters_in: integer, the number of input filters. - filters_out: integer, the number of output filters. - kernel_size: integer, the dimension of the convolution window. - strides: integer, the stride of the convolution. - expand_ratio: integer, scaling coefficient for the input filters. - id_skip: boolean. - - Returns: - output tensor for the block. - """ - if name is None: - name = f"block_{backend.get_uid('block_')}_" - - # Expansion phase - filters = filters_in * expand_ratio - if expand_ratio != 1: - x = layers.Conv2D( - filters, - 1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "expand_conv", - )(inputs) - x = layers.BatchNormalization(axis=BN_AXIS, name=name + "expand_bn")(x) - x = layers.Activation(activation, name=name + "expand_activation")(x) - else: - x = inputs - - # Depthwise Convolution - if strides == 2: - x = layers.ZeroPadding2D( - padding=correct_pad_downsample(x, kernel_size), - name=name + "dwconv_pad", - )(x) - conv_pad = "valid" - else: - conv_pad = "same" - x = layers.DepthwiseConv2D( - kernel_size, - strides=strides, - padding=conv_pad, - use_bias=False, - depthwise_initializer=CONV_KERNEL_INITIALIZER, - name=name + "dwconv", - )(x) - x = layers.BatchNormalization(axis=BN_AXIS, name=name + "bn")(x) - x = layers.Activation(activation, name=name + "activation")(x) - - # Skip SE block - # Output phase - x = layers.Conv2D( - filters_out, - 1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "project_conv", - )(x) - x = layers.BatchNormalization(axis=BN_AXIS, name=name + "project_bn")(x) - if id_skip and strides == 1 and filters_in == filters_out: - if drop_rate > 0: - x = layers.Dropout( - drop_rate, noise_shape=(None, 1, 1, 1), name=name + "drop" - )(x) - x = layers.add([x, inputs], name=name + "add") - return x - - -@keras.utils.register_keras_serializable(package="keras_cv.models") -class EfficientNetLite(keras.Model): - """Instantiates the EfficientNetLite architecture using given scaling - coefficients. - - Args: - include_rescaling: whether to rescale the inputs. If set to True, - inputs will be passed through a `Rescaling(1/255.0)` layer. - include_top: whether to include the fully-connected - layer at the top of the network. - width_coefficient: float, scaling coefficient for network width. - depth_coefficient: float, scaling coefficient for network depth. - default_size: integer, default input image size. - dropout_rate: float, dropout rate before final classifier layer. - drop_connect_rate: float, dropout rate at skip connections. - depth_divisor: integer, a unit of network width. - activation: activation function. - blocks_args: list of dicts, parameters to construct block modules. - model_name: string, model name. - weights: one of `None` (random initialization), - or the path to the weights file to be loaded. - input_shape: optional shape tuple, - It should have exactly 3 inputs channels. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - num_classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - classifier_activation: A `str` or callable. The activation function to - use on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" - layer. - - Returns: - A `keras.Model` instance. - - Raises: - ValueError: if `blocks_args` is invalid. - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - ValueError: if `classifier_activation` is not `softmax` or `None` - when using a pretrained top layer. - """ - - def __init__( - self, - include_rescaling, - include_top, - width_coefficient, - depth_coefficient, - default_size, - dropout_rate=0.2, - drop_connect_rate=0.2, - depth_divisor=8, - activation="relu6", - blocks_args=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - num_classes=None, - classifier_activation="softmax", - **kwargs, - ): - if blocks_args is None: - blocks_args = DEFAULT_BLOCKS_ARGS - if not isinstance(blocks_args, list): - raise ValueError( - "The `blocks_args` argument should be either `None` or valid" - "list of dicts for building blocks. " - f"Received: blocks_args={blocks_args}" - ) - intact_blocks_args = copy.deepcopy(blocks_args) # for configs - blocks_args = copy.deepcopy(blocks_args) - - if weights and not tf.io.gfile.exists(weights): - raise ValueError( - "The `weights` argument should be either `None` or the path to " - "the weights file to be loaded. " - f"Weights file not found at location: {weights}" - ) - - if include_top and not num_classes: - raise ValueError( - "If `include_top` is True, you should specify `num_classes`. " - f"Received: num_classes={num_classes}" - ) - - if include_top and pooling: - raise ValueError( - f"`pooling` must be `None` when `include_top=True`." - f"Received pooling={pooling} and include_top={include_top}. " - ) - - img_input = utils.parse_model_inputs(input_shape, input_tensor) - - # Build stem - x = img_input - - if include_rescaling: - # Use common rescaling strategy across keras_cv - x = layers.Rescaling(1.0 / 255.0)(x) - - x = layers.ZeroPadding2D( - padding=correct_pad_downsample(x, 3), name="stem_conv_pad" - )(x) - x = layers.Conv2D( - 32, - 3, - strides=2, - padding="valid", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name="stem_conv", - )(x) - x = layers.BatchNormalization(axis=BN_AXIS, name="stem_bn")(x) - x = layers.Activation(activation, name="stem_activation")(x) - - # Build blocks - b = 0 - blocks = float(sum(args["repeats"] for args in blocks_args)) - - for i, args in enumerate(blocks_args): - assert args["repeats"] > 0 - # Update block input and output filters based on depth multiplier. - args["filters_in"] = round_filters( - filters=args["filters_in"], - width_coefficient=width_coefficient, - depth_divisor=depth_divisor, - ) - args["filters_out"] = round_filters( - filters=args["filters_out"], - width_coefficient=width_coefficient, - depth_divisor=depth_divisor, - ) - - if i == 0 or i == (len(blocks_args) - 1): - repeats = args.pop("repeats") - else: - repeats = round_repeats( - repeats=args.pop("repeats"), - depth_coefficient=depth_coefficient, - ) - - for j in range(repeats): - # The first block needs to take care of stride and filter size - # increase. - if j > 0: - args["strides"] = 1 - args["filters_in"] = args["filters_out"] - x = apply_efficient_net_lite_block( - x, - activation=activation, - drop_rate=drop_connect_rate * b / blocks, - name="block{}{}_".format(i + 1, chr(j + 97)), - **args, - ) - - b += 1 - - # Build top - x = layers.Conv2D( - 1280, - 1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name="top_conv", - )(x) - x = layers.BatchNormalization(axis=BN_AXIS, name="top_bn")(x) - x = layers.Activation(activation, name="top_activation")(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - if dropout_rate > 0: - x = layers.Dropout(dropout_rate, name="top_dropout")(x) - x = layers.Dense( - num_classes, - activation=classifier_activation, - kernel_initializer=DENSE_KERNEL_INITIALIZER, - name="predictions", - )(x) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D(name="max_pool")(x) - - inputs = img_input - - # Create model. - super().__init__(inputs=inputs, outputs=x, **kwargs) - - # Load weights. - if weights is not None: - self.load_weights(weights) - - self.include_rescaling = include_rescaling - self.include_top = include_top - self.width_coefficient = width_coefficient - self.depth_coefficient = depth_coefficient - self.default_size = default_size - self.dropout_rate = dropout_rate - self.drop_connect_rate = drop_connect_rate - self.depth_divisor = depth_divisor - self.activation = activation - self.blocks_args = intact_blocks_args - self.input_tensor = input_tensor - self.pooling = pooling - self.num_classes = num_classes - self.classifier_activation = classifier_activation - - def get_config(self): - return { - "include_rescaling": self.include_rescaling, - "include_top": self.include_top, - "width_coefficient": self.width_coefficient, - "depth_coefficient": self.depth_coefficient, - "default_size": self.default_size, - "dropout_rate": self.dropout_rate, - "drop_connect_rate": self.drop_connect_rate, - "depth_divisor": self.depth_divisor, - "activation": self.activation, - "blocks_args": self.blocks_args, - # Remove batch dimension from `input_shape` - "input_shape": self.input_shape[1:], - "input_tensor": self.input_tensor, - "pooling": self.pooling, - "num_classes": self.num_classes, - "classifier_activation": self.classifier_activation, - "name": self.name, - "trainable": self.trainable, - } - - @classmethod - def from_config(cls, config): - return cls(**config) - - -def EfficientNetLiteB0( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - **kwargs, -): - return EfficientNetLite( - include_rescaling, - include_top, - width_coefficient=1.0, - depth_coefficient=1.0, - default_size=224, - dropout_rate=0.2, - name="efficientnetliteb0", - weights=parse_weights(weights, include_top, "efficientnetliteb0"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetLiteB1( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - **kwargs, -): - return EfficientNetLite( - include_rescaling, - include_top, - width_coefficient=1.0, - depth_coefficient=1.1, - default_size=240, - dropout_rate=0.2, - name="efficientnetliteb1", - weights=parse_weights(weights, include_top, "efficientnetliteb1"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetLiteB2( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - **kwargs, -): - return EfficientNetLite( - include_rescaling, - include_top, - width_coefficient=1.1, - depth_coefficient=1.2, - default_size=260, - dropout_rate=0.3, - name="efficientnetliteb2", - weights=parse_weights(weights, include_top, "efficientnetliteb2"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetLiteB3( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - **kwargs, -): - return EfficientNetLite( - include_rescaling, - include_top, - width_coefficient=1.2, - depth_coefficient=1.4, - default_size=280, - dropout_rate=0.3, - name="efficientnetliteb3", - weights=parse_weights(weights, include_top, "efficientnetliteb3"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetLiteB4( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - **kwargs, -): - return EfficientNetLite( - include_rescaling, - include_top, - width_coefficient=1.4, - depth_coefficient=1.8, - default_size=300, - dropout_rate=0.3, - name="efficientnetliteb4", - weights=parse_weights(weights, include_top, "efficientnetliteb4"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -EfficientNetLiteB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB0") -EfficientNetLiteB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB1") -EfficientNetLiteB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB2") -EfficientNetLiteB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB3") -EfficientNetLiteB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB4") diff --git a/keras_cv/models/legacy/efficientnet_lite_test.py b/keras_cv/models/legacy/efficientnet_lite_test.py deleted file mode 100644 index daa1d0e2c0..0000000000 --- a/keras_cv/models/legacy/efficientnet_lite_test.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2023 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from absl.testing import parameterized - -from keras_cv.models.legacy import efficientnet_lite -from keras_cv.tests.test_case import TestCase - -from .models_test import ModelsTest - -MODEL_LIST = [ - (efficientnet_lite.EfficientNetLiteB0, 1280, {}), -] - -""" -Below are other configurations that we omit from our CI but that can/should -be tested manually when making changes to this model. -(efficientnet_lite.EfficientNetLiteB1, 1280, {}), -(efficientnet_lite.EfficientNetLiteB2, 1280, {}), -(efficientnet_lite.EfficientNetLiteB3, 1280, {}), -(efficientnet_lite.EfficientNetLiteB4, 1280, {}), -""" - - -class EfficientNetLiteTest(ModelsTest, TestCase): - @parameterized.parameters(*MODEL_LIST) - def test_application_base(self, app, _, args): - super()._test_application_base(app, _, args) - - @parameterized.parameters(*MODEL_LIST) - def test_application_with_rescaling(self, app, last_dim, args): - super()._test_application_with_rescaling(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_application_pooling(self, app, last_dim, args): - super()._test_application_pooling(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_application_variable_input_channels(self, app, last_dim, args): - super()._test_application_variable_input_channels(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_model_can_be_used_as_backbone(self, app, last_dim, args): - super()._test_model_can_be_used_as_backbone(app, last_dim, args) From 1602b17eed20b187e6bb0f4656f1fbeff57947b2 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Thu, 17 Aug 2023 20:47:24 +0530 Subject: [PATCH 06/17] Migrate Efficientnetv1 to Backbone (#1716) * created new files * moved old files to new one * efficientnetv1 backbone, presets, and imports updated * fix imports * fix imports * added preset unit test * updated backbone * fix init * fix legacy init * fix aliases presets * fix backbone round filter argument * fix depthwise conv * fix conv kernel initializer * fix depthwiseconv * fix scope name match pattern error * fix scope name * fix block name * remove block args preset * remove model_name * remove default_size from preset * updated test cases * updated docs * fix id_skip * fix test * fix format * reviewd comments * fix format * fix typo * fix naming * test with layer-matching * fix typo * fix format * backbone test updated * review changes * port * fix port * fix port 2 * port: fix argument * port: final fix * port:docs typo * port: update * review comment * format --- keras_cv/models/__init__.py | 27 + keras_cv/models/backbones/backbone_presets.py | 5 + .../backbones/efficientnet_v1/__init__.py | 13 + .../efficientnet_v1_aliases.py | 315 ++++++ .../efficientnet_v1_backbone.py | 454 +++++++++ .../efficientnet_v1_backbone_presets.py | 337 +++++++ .../efficientnet_v1_backbone_presets_test.py | 60 ++ .../efficientnet_v1_backbone_test.py | 198 ++++ keras_cv/models/legacy/__init__.py | 8 - keras_cv/models/legacy/efficientnet_v1.py | 937 ------------------ .../models/legacy/efficientnet_v1_test.py | 58 -- 11 files changed, 1409 insertions(+), 1003 deletions(-) create mode 100644 keras_cv/models/backbones/efficientnet_v1/__init__.py create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py delete mode 100644 keras_cv/models/legacy/efficientnet_v1.py delete mode 100644 keras_cv/models/legacy/efficientnet_v1_test.py diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py index 1861b49c03..4191c07575 100644 --- a/keras_cv/models/__init__.py +++ b/keras_cv/models/__init__.py @@ -61,6 +61,33 @@ from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501 EfficientNetLiteBackbone, ) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B0Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B1Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B2Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B3Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B4Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B5Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B6Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B7Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1Backbone, +) from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import ( EfficientNetV2B0Backbone, ) diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py index 3852b28877..614f85cd24 100644 --- a/keras_cv/models/backbones/backbone_presets.py +++ b/keras_cv/models/backbones/backbone_presets.py @@ -19,6 +19,9 @@ from keras_cv.models.backbones.efficientnet_lite import ( efficientnet_lite_backbone_presets, ) +from keras_cv.models.backbones.efficientnet_v1 import ( + efficientnet_v1_backbone_presets, +) from keras_cv.models.backbones.efficientnet_v2 import ( efficientnet_v2_backbone_presets, ) @@ -32,6 +35,7 @@ **resnet_v2_backbone_presets.backbone_presets_no_weights, **mobilenet_v3_backbone_presets.backbone_presets_no_weights, **csp_darknet_backbone_presets.backbone_presets_no_weights, + **efficientnet_v1_backbone_presets.backbone_presets_no_weights, **efficientnet_v2_backbone_presets.backbone_presets_no_weights, **densenet_backbone_presets.backbone_presets_no_weights, **efficientnet_lite_backbone_presets.backbone_presets_no_weights, @@ -43,6 +47,7 @@ **resnet_v2_backbone_presets.backbone_presets_with_weights, **mobilenet_v3_backbone_presets.backbone_presets_with_weights, **csp_darknet_backbone_presets.backbone_presets_with_weights, + **efficientnet_v1_backbone_presets.backbone_presets_with_weights, **efficientnet_v2_backbone_presets.backbone_presets_with_weights, **densenet_backbone_presets.backbone_presets_with_weights, **efficientnet_lite_backbone_presets.backbone_presets_with_weights, diff --git a/keras_cv/models/backbones/efficientnet_v1/__init__.py b/keras_cv/models/backbones/efficientnet_v1/__init__.py new file mode 100644 index 0000000000..3992ffb59a --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_v1/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py new file mode 100644 index 0000000000..587c0e70ff --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py @@ -0,0 +1,315 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import ( + EfficientNetV1Backbone, +) +from keras_cv.utils.python_utils import classproperty + +ALIAS_DOCSTRING = """Instantiates the {name} architecture. + + Reference: + - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) + (ICML 2019) + + Args: + include_rescaling: bool, whether to rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + input_shape: optional shape tuple, defaults to (None, None, 3). + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. +""" # noqa: E501 + + +class EfficientNetV1B0Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b0", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B1Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b1", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B2Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b2", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B3Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b3", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B4Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b4", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B5Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b5", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B6Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b6", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +class EfficientNetV1B7Backbone(EfficientNetV1Backbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(None, None, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return EfficientNetV1Backbone.from_preset("efficientnetv1_b7", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return {} + + +setattr( + EfficientNetV1B0Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B0"), +) +setattr( + EfficientNetV1B1Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B1"), +) +setattr( + EfficientNetV1B2Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B2"), +) +setattr( + EfficientNetV1B3Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B3"), +) +setattr( + EfficientNetV1B4Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B4"), +) +setattr( + EfficientNetV1B5Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B5"), +) +setattr( + EfficientNetV1B6Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B6"), +) +setattr( + EfficientNetV1B7Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV1B7"), +) diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py new file mode 100644 index 0000000000..cc39d3d31c --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py @@ -0,0 +1,454 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import math + +from keras_cv.backend import keras +from keras_cv.models import utils +from keras_cv.models.backbones.backbone import Backbone +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone_presets import ( # noqa: E501 + backbone_presets, +) +from keras_cv.utils.python_utils import classproperty + + +@keras.saving.register_keras_serializable(package="keras_cv.models") +class EfficientNetV1Backbone(Backbone): + """Instantiates the EfficientNetV1 architecture. + + Reference: + - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) + (ICML 2019) + - [Based on the original keras.applications EfficientNet](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet.py) + + Args: + include_rescaling: bool, whether to rescale the inputs. If set to + True, inputs will be passed through a `Rescaling(1/255.0)` layer. + width_coefficient: float, scaling coefficient for network width. + depth_coefficient: float, scaling coefficient for network depth. + dropout_rate: float, dropout rate before final classifier layer. + drop_connect_rate: float, dropout rate at skip connections. The default + value is set to 0.2. + depth_divisor: integer, a unit of network width. The default value is + set to 8. + activation: activation function to use between each convolutional layer. + input_shape: optional shape tuple, it should have exactly 3 input + channels. + input_tensor: optional Keras tensor (i.e. output of `keras.keras.layers.Input()`) to + use as image input for the model. + stackwise_kernel_sizes: list of ints, the kernel sizes used for each + conv block. + stackwise_num_repeats: list of ints, number of times to repeat each + conv block. + stackwise_input_filters: list of ints, number of input filters for + each conv block. + stackwise_output_filters: list of ints, number of output filters for + each stack in the conv blocks model. + stackwise_expansion_ratios: list of floats, expand ratio passed to the + squeeze and excitation blocks. + stackwise_strides: list of ints, stackwise_strides for each conv block. + stackwise_squeeze_and_excite_ratios: list of ints, the squeeze and + excite ratios passed to the squeeze and excitation blocks. + + Usage: + ```python + # Construct an EfficientNetV1 from a preset: + efficientnet = keras_cv.models.EfficientNetV1Backbone.from_preset( + "efficientnetv1_b0" + ) + images = np.ones((1, 256, 256, 3)) + outputs = efficientnet.predict(images) + + # Alternatively, you can also customize the EfficientNetV1 architecture: + model = EfficientNetV1Backbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + stackwise_squeeze_and_excite_ratios=[ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=False, + ) + images = np.ones((1, 256, 256, 3)) + outputs = efficientnet.predict(images) + ``` + """ # noqa: E501 + + def __init__( + self, + *, + include_rescaling, + width_coefficient, + depth_coefficient, + stackwise_kernel_sizes, + stackwise_num_repeats, + stackwise_input_filters, + stackwise_output_filters, + stackwise_expansion_ratios, + stackwise_strides, + stackwise_squeeze_and_excite_ratios, + dropout_rate=0.2, + drop_connect_rate=0.2, + depth_divisor=8, + input_shape=(None, None, 3), + input_tensor=None, + activation="swish", + **kwargs, + ): + img_input = utils.parse_model_inputs(input_shape, input_tensor) + + x = img_input + + if include_rescaling: + # Use common rescaling strategy across keras_cv + x = keras.layers.Rescaling(1.0 / 255.0)(x) + + x = keras.layers.ZeroPadding2D( + padding=utils.correct_pad_downsample(x, 3), name="stem_conv_pad" + )(x) + + # Build stem + stem_filters = round_filters( + filters=stackwise_input_filters[0], + width_coefficient=width_coefficient, + divisor=depth_divisor, + ) + x = keras.layers.Conv2D( + filters=stem_filters, + kernel_size=3, + strides=2, + padding="valid", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name="stem_conv", + )(x) + x = keras.layers.BatchNormalization( + axis=3, + name="stem_bn", + )(x) + x = keras.layers.Activation(activation, name="stem_activation")(x) + + # Build blocks + block_id = 0 + blocks = float(sum(stackwise_num_repeats)) + + pyramid_level_inputs = [] + for i in range(len(stackwise_kernel_sizes)): + num_repeats = stackwise_num_repeats[i] + input_filters = stackwise_input_filters[i] + output_filters = stackwise_output_filters[i] + + # Update block input and output filters based on depth multiplier. + input_filters = round_filters( + filters=input_filters, + width_coefficient=width_coefficient, + divisor=depth_divisor, + ) + output_filters = round_filters( + filters=output_filters, + width_coefficient=width_coefficient, + divisor=depth_divisor, + ) + + repeats = round_repeats( + repeats=num_repeats, + depth_coefficient=depth_coefficient, + ) + strides = stackwise_strides[i] + squeeze_and_excite_ratio = stackwise_squeeze_and_excite_ratios[i] + + for j in range(repeats): + # The first block needs to take care of stride and filter size + # increase. + if j > 0: + strides = 1 + input_filters = output_filters + + if strides != 1: + pyramid_level_inputs.append(utils.get_tensor_input_name(x)) + + # 97 is the start of the lowercase alphabet. + letter_identifier = chr(j + 97) + x = apply_efficientnet_block( + inputs=x, + filters_in=input_filters, + filters_out=output_filters, + kernel_size=stackwise_kernel_sizes[i], + strides=strides, + expand_ratio=stackwise_expansion_ratios[i], + se_ratio=squeeze_and_excite_ratio, + activation=activation, + dropout_rate=drop_connect_rate * block_id / blocks, + name="block{}{}_".format(i + 1, letter_identifier), + ) + block_id += 1 + + # Build top + top_filters = round_filters( + filters=1280, + width_coefficient=width_coefficient, + divisor=depth_divisor, + ) + + x = keras.layers.Conv2D( + filters=top_filters, + kernel_size=1, + padding="same", + strides=1, + kernel_initializer=conv_kernel_initializer(), + use_bias=False, + name="top_conv", + )(x) + x = keras.layers.BatchNormalization( + axis=3, + name="top_bn", + )(x) + x = keras.layers.Activation( + activation=activation, name="top_activation" + )(x) + + pyramid_level_inputs.append(utils.get_tensor_input_name(x)) + + # Create model. + super().__init__(inputs=img_input, outputs=x, **kwargs) + + self.include_rescaling = include_rescaling + self.width_coefficient = width_coefficient + self.depth_coefficient = depth_coefficient + self.dropout_rate = dropout_rate + self.drop_connect_rate = drop_connect_rate + self.depth_divisor = depth_divisor + self.activation = activation + self.input_tensor = input_tensor + self.pyramid_level_inputs = { + f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs) + } + self.stackwise_kernel_sizes = stackwise_kernel_sizes + self.stackwise_num_repeats = stackwise_num_repeats + self.stackwise_input_filters = stackwise_input_filters + self.stackwise_output_filters = stackwise_output_filters + self.stackwise_expansion_ratios = stackwise_expansion_ratios + self.stackwise_strides = stackwise_strides + self.stackwise_squeeze_and_excite_ratios = ( + stackwise_squeeze_and_excite_ratios + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "include_rescaling": self.include_rescaling, + "width_coefficient": self.width_coefficient, + "depth_coefficient": self.depth_coefficient, + "dropout_rate": self.dropout_rate, + "drop_connect_rate": self.drop_connect_rate, + "depth_divisor": self.depth_divisor, + "activation": self.activation, + "input_tensor": self.input_tensor, + "input_shape": self.input_shape[1:], + "trainable": self.trainable, + "stackwise_kernel_sizes": self.stackwise_kernel_sizes, + "stackwise_num_repeats": self.stackwise_num_repeats, + "stackwise_input_filters": self.stackwise_input_filters, + "stackwise_output_filters": self.stackwise_output_filters, + "stackwise_expansion_ratios": self.stackwise_expansion_ratios, + "stackwise_strides": self.stackwise_strides, + "stackwise_squeeze_and_excite_ratios": ( + self.stackwise_squeeze_and_excite_ratios + ), + } + ) + return config + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return copy.deepcopy(backbone_presets) + + +def conv_kernel_initializer(scale=2.0): + return keras.initializers.VarianceScaling( + scale=scale, mode="fan_out", distribution="truncated_normal" + ) + + +def round_filters(filters, width_coefficient, divisor): + """Round number of filters based on depth multiplier. + + Args: + filters: int, number of filters for Conv layer + width_coefficient: float, denotes the scaling coefficient of network + width + divisor: int, a unit of network width + + Returns: + int, new rounded filters value for Conv layer + """ + filters *= width_coefficient + new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_filters < 0.9 * filters: + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, depth_coefficient): + """Round number of repeats based on depth multiplier. + + Args: + repeats: int, number of repeats of efficientnet block + depth_coefficient: float, denotes the scaling coefficient of network + depth + + Returns: + int, rounded repeats + """ + return int(math.ceil(depth_coefficient * repeats)) + + +def apply_efficientnet_block( + inputs, + filters_in=32, + filters_out=16, + kernel_size=3, + strides=1, + activation="swish", + expand_ratio=1, + se_ratio=0.0, + dropout_rate=0.0, + name="", +): + """An inverted residual block. + + Args: + inputs: Tensor, The input tensor of the block + filters_in: integer, the number of input filters. + filters_out: integer, the number of output filters. + kernel_size: integer, the dimension of the convolution window. + strides: integer, the stride of the convolution. + activation: activation function to use between each convolutional layer. + expand_ratio: integer, scaling coefficient for the input filters. + se_ratio: float between 0 and 1, fraction to squeeze the input filters. + dropout_rate: float between 0 and 1, fraction of the input units to drop. + name: string, block label. + + Returns: + output tensor for the block. + """ # noqa: E501 + filters = filters_in * expand_ratio + if expand_ratio != 1: + x = keras.layers.Conv2D( + filters=filters, + kernel_size=1, + strides=1, + padding="same", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name=name + "expand_conv", + )(inputs) + x = keras.layers.BatchNormalization( + axis=3, + name=name + "expand_bn", + )(x) + x = keras.layers.Activation( + activation, name=name + "expand_activation" + )(x) + else: + x = inputs + + # Depthwise Convolution + if strides == 2: + x = keras.layers.ZeroPadding2D( + padding=utils.correct_pad_downsample(x, kernel_size), + name=name + "dwconv_pad", + )(x) + conv_pad = "valid" + else: + conv_pad = "same" + + x = keras.layers.DepthwiseConv2D( + kernel_size=kernel_size, + strides=strides, + padding=conv_pad, + use_bias=False, + depthwise_initializer=conv_kernel_initializer(), + name=name + "dwconv", + )(x) + x = keras.layers.BatchNormalization( + axis=3, + name=name + "dwconv_bn", + )(x) + x = keras.layers.Activation(activation, name=name + "dwconv_activation")(x) + + # Squeeze and Excitation phase + if 0 < se_ratio <= 1: + filters_se = max(1, int(filters_in * se_ratio)) + se = keras.layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x) + se_shape = (1, 1, filters) + se = keras.layers.Reshape(se_shape, name=name + "se_reshape")(se) + se = keras.layers.Conv2D( + filters_se, + 1, + padding="same", + activation=activation, + kernel_initializer=conv_kernel_initializer(), + name=name + "se_reduce", + )(se) + se = keras.layers.Conv2D( + filters, + 1, + padding="same", + activation="sigmoid", + kernel_initializer=conv_kernel_initializer(), + name=name + "se_expand", + )(se) + x = keras.layers.multiply([x, se], name=name + "se_excite") + + # Output phase + x = keras.layers.Conv2D( + filters=filters_out, + kernel_size=1, + strides=1, + padding="same", + use_bias=False, + kernel_initializer=conv_kernel_initializer(), + name=name + "project", + )(x) + x = keras.layers.BatchNormalization( + axis=3, + name=name + "project_bn", + )(x) + x = keras.layers.Activation(activation, name=name + "project_activation")(x) + + if strides == 1 and filters_in == filters_out: + if dropout_rate > 0: + x = keras.layers.Dropout( + dropout_rate, + noise_shape=(None, 1, 1, 1), + name=name + "drop", + )(x) + x = keras.layers.add([x, inputs], name=name + "add") + + return x diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py new file mode 100644 index 0000000000..a2aac81d26 --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py @@ -0,0 +1,337 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""EfficientNetV1 model preset configurations.""" + +backbone_presets_no_weights = { + "efficientnetv1_b0": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.0` and `depth_coefficient=1.0`." + ), + "params": 4050716, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.0, + "depth_coefficient": 1.0, + "dropout_rate": 0.2, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b1": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.0` and `depth_coefficient=1.1`." + ), + "params": 6576704, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.0, + "depth_coefficient": 1.1, + "dropout_rate": 0.2, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b2": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.1` and `depth_coefficient=1.2`." + ), + "params": 7770034, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.1, + "depth_coefficient": 1.2, + "dropout_rate": 0.3, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b3": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.2` and `depth_coefficient=1.4`." + ), + "params": 10785960, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.2, + "depth_coefficient": 1.4, + "dropout_rate": 0.3, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b4": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.4` and `depth_coefficient=1.8`." + ), + "params": 17676984, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.4, + "depth_coefficient": 1.8, + "dropout_rate": 0.4, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b5": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.6` and `depth_coefficient=2.2`." + ), + "params": 28517360, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.6, + "depth_coefficient": 2.2, + "dropout_rate": 0.4, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b6": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=1.8` and `depth_coefficient=2.6`." + ), + "params": 40965800, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 1.8, + "depth_coefficient": 2.6, + "dropout_rate": 0.5, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, + "efficientnetv1_b7": { + "metadata": { + "description": ( + "EfficientNet B-style architecture with 7 " + "convolutional blocks. This B-style model has " + "`width_coefficient=2.0` and `depth_coefficient=3.1`." + ), + "params": 64105488, + "official_name": "EfficientNetV1", + "path": "efficientnetv1", + }, + "class_name": "keras_cv.models>EfficientNetV1Backbone", + "config": { + "width_coefficient": 2.0, + "depth_coefficient": 3.1, + "dropout_rate": 0.5, + "drop_connect_rate": 0.2, + "depth_divisor": 8, + "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3], + "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1], + "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192], + "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], + "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], + "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], + "stackwise_squeeze_and_excite_ratios": [ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "include_rescaling": True, + "input_shape": (None, None, 3), + "input_tensor": None, + "activation": "swish", + }, + }, +} + +backbone_presets_with_weights = {} + +backbone_presets = { + **backbone_presets_no_weights, + **backbone_presets_with_weights, +} diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py new file mode 100644 index 0000000000..fd73068311 --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py @@ -0,0 +1,60 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from absl.testing import parameterized + +from keras_cv.backend import keras +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B0Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import ( + EfficientNetV1Backbone, +) +from keras_cv.tests.test_case import TestCase +from keras_cv.utils.train import get_feature_extractor + + +@pytest.mark.extra_large +class EfficientNetV1PresetFullTest(TestCase): + """ + Test the full enumeration of our preset. + This every presets for EfficientNetV1 and is only run manually. + Run with: + `pytest keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py --run_extra_large` + """ # noqa: E501 + + @parameterized.named_parameters( + *[(preset, preset) for preset in EfficientNetV1Backbone.presets] + ) + def test_load_efficientnet(self, preset): + input_data = np.ones(shape=(2, 224, 224, 3)) + model = EfficientNetV1Backbone.from_preset(preset) + model(input_data) + + def test_efficientnet_feature_extractor(self): + model = EfficientNetV1B0Backbone( + include_rescaling=False, + input_shape=[256, 256, 3], + ) + levels = ["P3", "P4"] + layer_names = [model.pyramid_level_inputs[level] for level in levels] + backbone_model = get_feature_extractor(model, layer_names, levels) + inputs = keras.Input(shape=[256, 256, 3]) + outputs = backbone_model(inputs) + self.assertLen(outputs, 2) + self.assertEquals(list(outputs.keys()), levels) + self.assertEquals(outputs["P3"].shape[:3], (None, 32, 32)) + self.assertEquals(outputs["P4"].shape[:3], (None, 16, 16)) diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py new file mode 100644 index 0000000000..ac8a8dfa81 --- /dev/null +++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py @@ -0,0 +1,198 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import pytest +from absl.testing import parameterized + +from keras_cv.backend import keras +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import ( + EfficientNetV1B0Backbone, +) +from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import ( + EfficientNetV1Backbone, +) +from keras_cv.tests.test_case import TestCase +from keras_cv.utils.train import get_feature_extractor + + +class EfficientNetV1BackboneTest(TestCase): + def setUp(self): + self.input_batch = np.ones(shape=(8, 224, 224, 3)) + + def test_valid_call(self): + model = EfficientNetV1Backbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + stackwise_squeeze_and_excite_ratios=[ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=False, + ) + model(self.input_batch) + + def test_valid_call_alias_model_with_rescaling(self): + model = EfficientNetV1B0Backbone(include_rescaling=True) + model(self.input_batch) + + def test_valid_call_with_rescaling(self): + model = EfficientNetV1Backbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + stackwise_squeeze_and_excite_ratios=[ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=True, + ) + model(self.input_batch) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_model(self): + model = EfficientNetV1Backbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + stackwise_squeeze_and_excite_ratios=[ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=True, + ) + model_output = model(self.input_batch) + save_path = os.path.join( + self.get_temp_dir(), "efficientnet_v1_backbone.keras" + ) + model.save(save_path) + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + self.assertIsInstance(restored_model, EfficientNetV1Backbone) + + # Check that output matches. + restored_output = restored_model(self.input_batch) + self.assertAllClose(model_output, restored_output) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_alias_model(self): + model = EfficientNetV1B0Backbone() + model_output = model(self.input_batch) + save_path = os.path.join( + self.get_temp_dir(), "efficientnet_v1_backbone.keras" + ) + model.save(save_path) + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + # Note that these aliases serialized as the base class + self.assertIsInstance(restored_model, EfficientNetV1Backbone) + + # Check that output matches. + restored_output = restored_model(self.input_batch) + self.assertAllClose(model_output, restored_output) + + def test_feature_pyramid_inputs(self): + model = EfficientNetV1B0Backbone() + backbone_model = get_feature_extractor( + model, + model.pyramid_level_inputs.values(), + model.pyramid_level_inputs.keys(), + ) + input_size = 256 + inputs = keras.Input(shape=[input_size, input_size, 3]) + outputs = backbone_model(inputs) + levels = ["P1", "P2", "P3", "P4", "P5"] + self.assertEquals(list(outputs.keys()), levels) + self.assertEquals( + outputs["P1"].shape, + (None, input_size // 2**1, input_size // 2**1, 16), + ) + self.assertEquals( + outputs["P2"].shape, + (None, input_size // 2**2, input_size // 2**2, 24), + ) + self.assertEquals( + outputs["P3"].shape, + (None, input_size // 2**3, input_size // 2**3, 40), + ) + self.assertEquals( + outputs["P4"].shape, + (None, input_size // 2**4, input_size // 2**4, 112), + ) + self.assertEquals( + outputs["P5"].shape, + (None, input_size // 2**5, input_size // 2**5, 1280), + ) + + @parameterized.named_parameters( + ("one_channel", 1), + ("four_channels", 4), + ) + def test_application_variable_input_channels(self, num_channels): + model = EfficientNetV1Backbone( + stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3], + stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1], + stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192], + stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320], + stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6], + stackwise_strides=[1, 2, 2, 2, 1, 2, 1], + stackwise_squeeze_and_excite_ratios=[ + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + width_coefficient=1.0, + depth_coefficient=1.0, + include_rescaling=True, + ) + self.assertEqual(model.output_shape, (None, None, None, 1280)) diff --git a/keras_cv/models/legacy/__init__.py b/keras_cv/models/legacy/__init__.py index 20df5826f0..419ae34b31 100644 --- a/keras_cv/models/legacy/__init__.py +++ b/keras_cv/models/legacy/__init__.py @@ -24,14 +24,6 @@ from keras_cv.models.legacy.convnext import ConvNeXtXLarge from keras_cv.models.legacy.darknet import DarkNet21 from keras_cv.models.legacy.darknet import DarkNet53 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB0 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB1 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB2 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB3 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB4 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB5 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB6 -from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB7 from keras_cv.models.legacy.mlp_mixer import MLPMixerB16 from keras_cv.models.legacy.mlp_mixer import MLPMixerB32 from keras_cv.models.legacy.mlp_mixer import MLPMixerL16 diff --git a/keras_cv/models/legacy/efficientnet_v1.py b/keras_cv/models/legacy/efficientnet_v1.py deleted file mode 100644 index b91a63697a..0000000000 --- a/keras_cv/models/legacy/efficientnet_v1.py +++ /dev/null @@ -1,937 +0,0 @@ -# Copyright 2022 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""EfficientNet models for Keras. - -Reference: - - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) - (ICML 2019) - - [Based on the original keras.applications EfficientNet](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet.py) -""" # noqa: E501 - -import copy -import math - -import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import backend -from tensorflow.keras import layers - -from keras_cv.models.legacy import utils -from keras_cv.models.legacy.weights import parse_weights - -DEFAULT_BLOCKS_ARGS = [ - { - "kernel_size": 3, - "repeats": 1, - "filters_in": 32, - "filters_out": 16, - "expand_ratio": 1, - "id_skip": True, - "strides": 1, - "se_ratio": 0.25, - }, - { - "kernel_size": 3, - "repeats": 2, - "filters_in": 16, - "filters_out": 24, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - "se_ratio": 0.25, - }, - { - "kernel_size": 5, - "repeats": 2, - "filters_in": 24, - "filters_out": 40, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - "se_ratio": 0.25, - }, - { - "kernel_size": 3, - "repeats": 3, - "filters_in": 40, - "filters_out": 80, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - "se_ratio": 0.25, - }, - { - "kernel_size": 5, - "repeats": 3, - "filters_in": 80, - "filters_out": 112, - "expand_ratio": 6, - "id_skip": True, - "strides": 1, - "se_ratio": 0.25, - }, - { - "kernel_size": 5, - "repeats": 4, - "filters_in": 112, - "filters_out": 192, - "expand_ratio": 6, - "id_skip": True, - "strides": 2, - "se_ratio": 0.25, - }, - { - "kernel_size": 3, - "repeats": 1, - "filters_in": 192, - "filters_out": 320, - "expand_ratio": 6, - "id_skip": True, - "strides": 1, - "se_ratio": 0.25, - }, -] - -CONV_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 2.0, - "mode": "fan_out", - "distribution": "truncated_normal", - }, -} - -DENSE_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 1.0 / 3.0, - "mode": "fan_out", - "distribution": "uniform", - }, -} - -BASE_DOCSTRING = """Instantiates the {name} architecture. - - Reference: - - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) - (ICML 2019) - - This class represents a Keras image classification model. - - For image classification use cases, see - [this page for detailed examples](https://keras.io/api/applications/#usage-examples-for-image-classification-models). - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). - - Args: - include_rescaling: bool, whether to rescale the inputs. If set to - True, inputs will be passed through a `Rescaling(1/255.0)` layer. - include_top: bool, Whether to include the fully-connected layer at the - top of the network. - weights: One of `None` (random initialization), or the path to the - weights file to be loaded. - input_shape: tuple, Optional shape tuple. It should have exactly 3 - inputs channels. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to - use as image input for the model. - pooling: Optional pooling mode for feature extraction when `include_top` - is `False`, defaults to None. - - `None` means that the output of the model will be the 4D tensor - output of the last convolutional layer. - - `avg` means that global average pooling will be applied to the - output of the last convolutional layer, and thus the output of - the model will be a 2D tensor. - - `max` means that global max pooling will be applied. - num_classes: int, Optional number of classes to classify images into, - only to be specified if `include_top` is True, and if no `weights` - argument is specified, defaults to None. - classifier_activation: A `str` or callable. The activation function to - use on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" - layer. Defaults to 'softmax'. When loading pretrained weights, - `classifier_activation` can only be `None` or `"softmax"`. - - Returns: - A `keras.Model` instance. -""" # noqa: E501 - -BN_AXIS = 3 - - -def correct_pad(inputs, kernel_size): - """Returns a tuple for zero-padding for 2D convolution with downsampling. - Args: - inputs: Input tensor. - kernel_size: An integer or tuple/list of 2 integers. - Returns: - A tuple. - """ - img_dim = 1 - input_size = backend.int_shape(inputs)[img_dim : (img_dim + 2)] - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if input_size[0] is None: - adjust = (1, 1) - else: - adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) - correct = (kernel_size[0] // 2, kernel_size[1] // 2) - return ( - (correct[0] - adjust[0], correct[0]), - (correct[1] - adjust[1], correct[1]), - ) - - -def apply_conv_bn( - x, - conv_type, - filters, - kernel_size, - strides=1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - bn_norm=True, - activation="swish", - name="", -): - """ - Represents Convolutional Block with optional Batch Normalization layer and - activation layer - - Args: - x: Tensor - conv_type: str, Type of Conv layer to be used in block. - - 'normal': The Conv2D layer will be used. - - 'depth': The DepthWiseConv2D layer will be used. - filters: int, The filter size of the Conv layer. It should be `None` - when `conv_type` is set as `depth` - kernel_size: int (or) tuple, The kernel size of the Conv layer. - strides: int (or) tuple, The stride value of Conv layer. - padding: str (or) callable, The type of padding for Conv layer. - use_bias: bool, Boolean to use bias for Conv layer. - kernel_initializer: dict (or) str (or) callable, The kernel initializer - for Conv layer. - bn_norm: bool, Boolean to add BatchNormalization layer after Conv layer. - activation: str (or) callable, Activation to be applied on the output at - the end. - name: str, name of the block - - Returns: - tf.Tensor - """ - if conv_type == "normal": - if filters is None or kernel_size is None: - raise ValueError( - "The filter size and kernel size should be set for Conv2D " - "layer." - ) - x = layers.Conv2D( - filters, - kernel_size, - strides=strides, - padding=padding, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - name=name + "_conv", - )(x) - elif conv_type == "depth": - if filters is not None: - raise ValueError( - "Filter size shouldn't be set for DepthWiseConv2D layer." - ) - if kernel_size is None or strides is None: - raise ValueError( - "The kernel size and strides should be set for DepthWiseConv2D " - "layer." - ) - x = layers.DepthwiseConv2D( - kernel_size, - strides=strides, - padding=padding, - use_bias=use_bias, - depthwise_initializer=kernel_initializer, - name=name + "_dwconv", - )(x) - else: - raise ValueError( - "The 'conv_type' parameter should be set either to 'normal' or " - "'depth'" - ) - - if bn_norm: - x = layers.BatchNormalization(axis=BN_AXIS, name=name + "_bn")(x) - if activation is not None: - x = layers.Activation(activation, name=name + "_activation")(x) - - return x - - -def apply_efficientnet_block( - inputs, - filters_in=32, - filters_out=16, - kernel_size=3, - strides=1, - activation="swish", - expand_ratio=1, - se_ratio=0.0, - id_skip=True, - drop_rate=0.0, - name="", -): - """An inverted residual block. - - Args: - inputs: Tensor, The input tensor of the block - filters_in: integer, the number of input filters. - filters_out: integer, the number of output filters. - kernel_size: integer, the dimension of the convolution window. - strides: integer, the stride of the convolution. - activation: activation function. - expand_ratio: integer, scaling coefficient for the input filters. - se_ratio: float between 0 and 1, fraction to squeeze the input filters. - id_skip: boolean. - drop_rate: float between 0 and 1, fraction of the input units to drop. - name: string, block label. - - Returns: - tf.Tensor - """ - filters = filters_in * expand_ratio - if expand_ratio != 1: - x = apply_conv_bn( - x=inputs, - conv_type="normal", - filters=filters, - kernel_size=1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - bn_norm=True, - activation=activation, - name=name + "_expand", - ) - else: - x = inputs - - # Depthwise Convolution - if strides == 2: - x = layers.ZeroPadding2D( - padding=correct_pad(x, kernel_size), - name=name + "_dwconv_pad", - )(x) - conv_pad = "valid" - else: - conv_pad = "same" - - x = apply_conv_bn( - x=x, - conv_type="depth", - filters=None, - kernel_size=kernel_size, - strides=strides, - padding=conv_pad, - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - bn_norm=True, - activation=activation, - name=name, - ) - - # Squeeze and Excitation phase - if 0 < se_ratio <= 1: - filters_se = max(1, int(filters_in * se_ratio)) - se = layers.GlobalAveragePooling2D(name=name + "_se_squeeze")(x) - if BN_AXIS == 1: - se_shape = (filters, 1, 1) - else: - se_shape = (1, 1, filters) - se = layers.Reshape(se_shape, name=name + "_se_reshape")(se) - se = layers.Conv2D( - filters_se, - 1, - padding="same", - activation=activation, - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "_se_reduce", - )(se) - se = layers.Conv2D( - filters, - 1, - padding="same", - activation="sigmoid", - kernel_initializer=CONV_KERNEL_INITIALIZER, - name=name + "_se_expand", - )(se) - x = layers.multiply([x, se], name=name + "_se_excite") - - # Output phase - x = apply_conv_bn( - x=x, - conv_type="normal", - filters=filters_out, - kernel_size=1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - bn_norm=True, - activation=None, - name=name + "_project", - ) - - if id_skip and strides == 1 and filters_in == filters_out: - if drop_rate > 0: - x = layers.Dropout( - drop_rate, - noise_shape=(None, 1, 1, 1), - name=name + "_drop", - )(x) - x = layers.add([x, inputs], name=name + "_add") - - return x - - -@keras.utils.register_keras_serializable(package="keras_cv.models") -class EfficientNet(keras.Model): - """This class represents a Keras EfficientNet architecture. - Args: - include_rescaling: bool, whether to rescale the inputs. If set to - True, inputs will be passed through a `Rescaling(1/255.0)` layer. - include_top: bool, whether to include the fully-connected layer at the - top of the network. - width_coefficient: float, scaling coefficient for network width. - depth_coefficient: float, scaling coefficient for network depth. - default_size: integer, default input image size. - dropout_rate: float, dropout rate before final classifier layer. - drop_connect_rate: float, dropout rate at skip connections. - depth_divisor: integer, a unit of network width. - activation: activation function. - blocks_args: list of dicts, parameters to construct block modules. - model_name: string, model name. - weights: one of `None` (random initialization), or the path to the - weights file to be loaded. - input_shape: optional shape tuple, it should have exactly 3 input - channels. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to - use as image input for the model. - pooling: optional pooling mode for feature extraction when `include_top` - is `False`. - - `None` means that the output of the model will be the 4D tensor - output of the last convolutional layer. - - `avg` means that global average pooling will be applied to the - output of the last convolutional layer, and thus the output of - the model will be a 2D tensor. - - `max` means that global max pooling will be applied. - num_classes: optional number of classes to classify images into, - only to be specified if `include_top` is True, and if no `weights` - argument is specified. - classifier_activation: A `str` or callable. The activation function to - use on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" - layer. - Returns: - A `keras.Model` instance. - Raises: - ValueError: in case of invalid argument for `weights`, or invalid input - shape. - ValueError: if `classifier_activation` is not `softmax` or `None` when - using a pretrained top layer. - """ - - def __init__( - self, - include_rescaling, - include_top, - width_coefficient, - depth_coefficient, - default_size, - dropout_rate=0.2, - drop_connect_rate=0.2, - depth_divisor=8, - activation="swish", - blocks_args="default", - model_name="efficientnet", - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - num_classes=None, - classifier_activation="softmax", - **kwargs, - ): - blocks_args_type = blocks_args - - if blocks_args == "default": - blocks_args = DEFAULT_BLOCKS_ARGS - - if weights and not tf.io.gfile.exists(weights): - raise ValueError( - "The `weights` argument should be either `None` or the path to " - "the weights file to be loaded. Weights file not found at " - f"location: {weights}" - ) - - if include_top and not num_classes: - raise ValueError( - "If `include_top` is True, you should specify `num_classes`. " - f"Received: num_classes={num_classes}" - ) - - if include_top and pooling: - raise ValueError( - f"`pooling` must be `None` when `include_top=True`." - f"Received pooling={pooling} and include_top={include_top}. " - ) - - img_input = utils.parse_model_inputs(input_shape, input_tensor) - - # Build stem - x = img_input - - if include_rescaling: - # Use common rescaling strategy across keras_cv - x = layers.Rescaling(1.0 / 255.0)(x) - - x = layers.ZeroPadding2D( - padding=correct_pad(x, 3), name="stem_conv_pad" - )(x) - - x = apply_conv_bn( - x=x, - conv_type="normal", - filters=EfficientNet.round_filters( - 32, width_coefficient, depth_divisor - ), - kernel_size=3, - strides=2, - padding="valid", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - bn_norm=True, - activation=activation, - name="stem", - ) - - # Build blocks - blocks_args = copy.deepcopy(blocks_args) - - b = 0 - blocks = float( - sum( - EfficientNet.round_repeats(args["repeats"], depth_coefficient) - for args in blocks_args - ) - ) - for i, args in enumerate(blocks_args): - assert args["repeats"] > 0 - # Update block input and output filters based on depth multiplier. - args["filters_in"] = EfficientNet.round_filters( - args["filters_in"], width_coefficient, depth_divisor - ) - args["filters_out"] = EfficientNet.round_filters( - args["filters_out"], width_coefficient, depth_divisor - ) - - for j in range( - EfficientNet.round_repeats( - args.pop("repeats"), depth_coefficient - ) - ): - # The first block needs to take care of stride and filter size - # increase. - if j > 0: - args["strides"] = 1 - args["filters_in"] = args["filters_out"] - x = apply_efficientnet_block( - inputs=x, - activation=activation, - drop_rate=drop_connect_rate * b / blocks, - name="block{}{}".format(i + 1, chr(j + 97)), - **args, - ) - b += 1 - - # Build top - x = apply_conv_bn( - x=x, - conv_type="normal", - filters=self.round_filters(1280, width_coefficient, depth_divisor), - kernel_size=1, - padding="same", - use_bias=False, - kernel_initializer=CONV_KERNEL_INITIALIZER, - bn_norm=True, - activation=activation, - name="top", - ) - - if include_top: - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - if dropout_rate > 0: - x = layers.Dropout(dropout_rate, name="top_dropout")(x) - x = layers.Dense( - num_classes, - activation=classifier_activation, - kernel_initializer=DENSE_KERNEL_INITIALIZER, - name="predictions", - )(x) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D(name="max_pool")(x) - - inputs = img_input - - # Create model. - super().__init__(inputs=inputs, outputs=x, name=model_name, **kwargs) - - # Load weights. - if weights is not None: - self.load_weights(weights) - - self.include_rescaling = include_rescaling - self.include_top = include_top - self.width_coefficient = width_coefficient - self.depth_coefficient = depth_coefficient - self.default_size = default_size - self.dropout_rate = dropout_rate - self.drop_connect_rate = drop_connect_rate - self.depth_divisor = depth_divisor - self.activation = activation - self.blocks_args = blocks_args_type - self.input_tensor = input_tensor - self.pooling = pooling - self.num_classes = num_classes - self.classifier_activation = classifier_activation - - @staticmethod - def round_filters(filters, width_coefficient, divisor): - """Round number of filters based on depth multiplier. - Args: - filters: int, number of filters for Conv layer - width_coefficient: float, denotes the scaling coefficient of network - width - divisor: int, a unit of network width - - Returns: - int, new rounded filters value for Conv layer - """ - filters *= width_coefficient - new_filters = max( - divisor, int(filters + divisor / 2) // divisor * divisor - ) - # Make sure that round down does not go down by more than 10%. - if new_filters < 0.9 * filters: - new_filters += divisor - return int(new_filters) - - @staticmethod - def round_repeats(repeats, depth_coefficient): - """Round number of repeats based on depth multiplier. - Args: - repeats: int, number of repeats of efficientnet block - depth_coefficient: float, denotes the scaling coefficient of network - depth - - Returns: - int, rounded repeats - """ - return int(math.ceil(depth_coefficient * repeats)) - - def get_config(self): - return { - "include_rescaling": self.include_rescaling, - "include_top": self.include_top, - "width_coefficient": self.width_coefficient, - "depth_coefficient": self.depth_coefficient, - "default_size": self.default_size, - "dropout_rate": self.dropout_rate, - "drop_connect_rate": self.drop_connect_rate, - "depth_divisor": self.depth_divisor, - "activation": self.activation, - "blocks_args": self.blocks_args, - "input_tensor": self.input_tensor, - "input_shape": self.input_shape[1:], - "model_name": self.name, - "pooling": self.pooling, - "num_classes": self.num_classes, - "classifier_activation": self.classifier_activation, - "trainable": self.trainable, - } - - @classmethod - def from_config(cls, config): - return cls(**config) - - -def EfficientNetB0( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb0", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.0, - depth_coefficient=1.0, - default_size=224, - dropout_rate=0.2, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb0"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB1( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb1", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.0, - depth_coefficient=1.1, - default_size=240, - dropout_rate=0.2, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb1"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB2( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb2", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.1, - depth_coefficient=1.2, - default_size=260, - dropout_rate=0.3, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb2"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB3( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb3", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.2, - depth_coefficient=1.4, - default_size=300, - dropout_rate=0.3, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb3"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB4( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb4", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.4, - depth_coefficient=1.8, - default_size=380, - dropout_rate=0.4, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb4"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB5( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb5", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.6, - depth_coefficient=2.2, - default_size=456, - dropout_rate=0.4, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb5"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB6( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb6", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=1.8, - depth_coefficient=2.6, - default_size=528, - dropout_rate=0.5, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb6"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -def EfficientNetB7( - *, - include_rescaling, - include_top, - num_classes=None, - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classifier_activation="softmax", - name="efficientnetb7", - **kwargs, -): - return EfficientNet( - include_rescaling, - include_top, - width_coefficient=2.0, - depth_coefficient=3.1, - default_size=600, - dropout_rate=0.5, - model_name=name, - weights=parse_weights(weights, include_top, "efficientnetb7"), - input_shape=input_shape, - input_tensor=input_tensor, - pooling=pooling, - num_classes=num_classes, - classifier_activation=classifier_activation, - **kwargs, - ) - - -EfficientNetB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB0") -EfficientNetB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB1") -EfficientNetB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB2") -EfficientNetB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB3") -EfficientNetB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB4") -EfficientNetB5.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB5") -EfficientNetB6.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB6") -EfficientNetB7.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB7") diff --git a/keras_cv/models/legacy/efficientnet_v1_test.py b/keras_cv/models/legacy/efficientnet_v1_test.py deleted file mode 100644 index 7615075618..0000000000 --- a/keras_cv/models/legacy/efficientnet_v1_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2022 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from absl.testing import parameterized - -from keras_cv.models.legacy import efficientnet_v1 -from keras_cv.tests.test_case import TestCase - -from .models_test import ModelsTest - -MODEL_LIST = [ - (efficientnet_v1.EfficientNetB0, 1280, {}), -] - -""" -Below are other configurations that we omit from our CI but that can/should -be tested manually when making changes to this model. -(efficientnet_v1.EfficientNetB1, 1280, {}), -(efficientnet_v1.EfficientNetB2, 1408, {}), -(efficientnet_v1.EfficientNetB3, 1536, {}), -(efficientnet_v1.EfficientNetB4, 1792, {}), -(efficientnet_v1.EfficientNetB5, 2048, {}), -(efficientnet_v1.EfficientNetB6, 2304, {}), -(efficientnet_v1.EfficientNetB7, 2560, {}), -""" - - -class EfficientNetV1Test(ModelsTest, TestCase): - @parameterized.parameters(*MODEL_LIST) - def test_application_base(self, app, _, args): - super()._test_application_base(app, _, args) - - @parameterized.parameters(*MODEL_LIST) - def test_application_with_rescaling(self, app, last_dim, args): - super()._test_application_with_rescaling(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_application_pooling(self, app, last_dim, args): - super()._test_application_pooling(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_application_variable_input_channels(self, app, last_dim, args): - super()._test_application_variable_input_channels(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_model_can_be_used_as_backbone(self, app, last_dim, args): - super()._test_model_can_be_used_as_backbone(app, last_dim, args) From 118f502abd38d5b520c0054972e2a2081915d871 Mon Sep 17 00:00:00 2001 From: Bhavesh Misra Date: Thu, 17 Aug 2023 22:46:24 +0530 Subject: [PATCH 07/17] Issue_1957 Returning the Matplotlib plt object in the plot_bounding_box_gallery.py file (#2000) * Updating_the plot_bounding_box_gallery.py * Tried_plot_bounding_box_gallery * Tried_plot_bounding_box_gallery * Trying_passing_classmapping * returning_plt_object_done * returning_plt_object_done * Done_hopefully * Done_Hopefully_fnal_2 * linting * Revert "linting" This reverts commit 64e7e2ab376b8b3505b9c961bf55259b033174c7. I made a mistake lol * Linting * Linting_Donee * Conditional_Removed --- examples/visualization/plot_image_gallery.py | 2 +- keras_cv/visualization/plot_bounding_box_gallery.py | 2 +- keras_cv/visualization/plot_image_gallery.py | 8 +++----- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/visualization/plot_image_gallery.py b/examples/visualization/plot_image_gallery.py index 17197aca4d..be8131643c 100644 --- a/examples/visualization/plot_image_gallery.py +++ b/examples/visualization/plot_image_gallery.py @@ -5,7 +5,7 @@ Date created: 2022/10/16 Last modified: 2022/06/24 Description: Visualize ground truth and predicted bounding boxes for a given - dataset. + dataset. """ """ diff --git a/keras_cv/visualization/plot_bounding_box_gallery.py b/keras_cv/visualization/plot_bounding_box_gallery.py index 1f6bd5cf64..73112458bd 100644 --- a/keras_cv/visualization/plot_bounding_box_gallery.py +++ b/keras_cv/visualization/plot_bounding_box_gallery.py @@ -174,7 +174,7 @@ def unpackage_tfds_inputs(inputs): ), ] - plot_image_gallery( + return plot_image_gallery( plotted_images, value_range, legend_handles=legend_handles, diff --git a/keras_cv/visualization/plot_image_gallery.py b/keras_cv/visualization/plot_image_gallery.py index 1d98c20f53..05cbbad796 100644 --- a/keras_cv/visualization/plot_image_gallery.py +++ b/keras_cv/visualization/plot_image_gallery.py @@ -117,9 +117,6 @@ def plot_image_gallery( """ assert_matplotlib_installed("plot_bounding_box_gallery") - if path is None and show is None: - # Default to showing the image - show = True if path is not None and show: raise ValueError( "plot_gallery() expects either `path` to be set, or `show` " @@ -178,8 +175,9 @@ def plot_image_gallery( current_axis.margins(x=0, y=0) current_axis.axis("off") - if path is None and not show: - return + if path is None and show is None: + return fig + if path is not None: plt.savefig( fname=path, From 38381bad0496129afdf22a6b9f79ce5651b38bc5 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Fri, 18 Aug 2023 02:04:51 +0530 Subject: [PATCH 08/17] [EfficientNetV2 Backbone] Style fix (#2031) * preset+style fix * fix --- .../efficientnet_v2_aliases.py | 99 ++++++++----------- 1 file changed, 43 insertions(+), 56 deletions(-) diff --git a/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py b/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py index 6489d5b65b..f338874982 100644 --- a/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py +++ b/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py @@ -20,12 +20,9 @@ from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_backbone_presets import ( # noqa: E501 backbone_presets, ) -from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_backbone_presets import ( # noqa: E501 - backbone_presets_with_weights, -) from keras_cv.utils.python_utils import classproperty -ALIAS_BASE_DOCSTRING = """Instantiates the {name} architecture. +ALIAS_DOCSTRING = """Instantiates the {name} architecture. Reference: - [EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298) @@ -64,8 +61,8 @@ def __new__( def presets(cls): """Dictionary of preset names and configurations.""" return { - "efficientnetv2_s": copy.deepcopy( - backbone_presets["efficientnetv2_s"] + "efficientnetv2_s_imagenet": copy.deepcopy( + backbone_presets["efficientnetv2_s_imagenet"] ), } @@ -73,11 +70,7 @@ def presets(cls): def presets_with_weights(cls): """Dictionary of preset names and configurations that include weights.""" - return { - "efficientnetv2_s_imagenet": copy.deepcopy( - backbone_presets_with_weights["efficientnetv2_s_imagenet"] - ), - } + return cls.presets @keras_cv_export("keras_cv.models.EfficientNetV2MBackbone") @@ -102,11 +95,7 @@ def __new__( @classproperty def presets(cls): """Dictionary of preset names and configurations.""" - return { - "efficientnetv2_m": copy.deepcopy( - backbone_presets["efficientnetv2_m"] - ), - } + return {} @classproperty def presets_with_weights(cls): @@ -137,11 +126,7 @@ def __new__( @classproperty def presets(cls): """Dictionary of preset names and configurations.""" - return { - "efficientnetv2_l": copy.deepcopy( - backbone_presets["efficientnetv2_l"] - ), - } + return {} @classproperty def presets_with_weights(cls): @@ -173,8 +158,8 @@ def __new__( def presets(cls): """Dictionary of preset names and configurations.""" return { - "efficientnetv2_b0": copy.deepcopy( - backbone_presets["efficientnetv2_b0"] + "efficientnetv2_b0_imagenet": copy.deepcopy( + backbone_presets["efficientnetv2_b0_imagenet"] ), } @@ -182,11 +167,7 @@ def presets(cls): def presets_with_weights(cls): """Dictionary of preset names and configurations that include weights.""" - return { - "efficientnetv2_b0_imagenet": copy.deepcopy( - backbone_presets_with_weights["efficientnetv2_b0_imagenet"] - ), - } + return cls.presets @keras_cv_export("keras_cv.models.EfficientNetV2B1Backbone") @@ -212,8 +193,8 @@ def __new__( def presets(cls): """Dictionary of preset names and configurations.""" return { - "efficientnetv2_b1": copy.deepcopy( - backbone_presets["efficientnetv2_b1"] + "efficientnetv2_b1_imagenet": copy.deepcopy( + backbone_presets["efficientnetv2_b1_imagenet"] ), } @@ -221,11 +202,7 @@ def presets(cls): def presets_with_weights(cls): """Dictionary of preset names and configurations that include weights.""" - return { - "efficientnetv2_b1_imagenet": copy.deepcopy( - backbone_presets_with_weights["efficientnetv2_b1_imagenet"] - ), - } + return cls.presets @keras_cv_export("keras_cv.models.EfficientNetV2B2Backbone") @@ -251,8 +228,8 @@ def __new__( def presets(cls): """Dictionary of preset names and configurations.""" return { - "efficientnetv2_b2": copy.deepcopy( - backbone_presets["efficientnetv2_b2"] + "efficientnetv2_b2_imagenet": copy.deepcopy( + backbone_presets["efficientnetv2_b2_imagenet"] ), } @@ -260,11 +237,7 @@ def presets(cls): def presets_with_weights(cls): """Dictionary of preset names and configurations that include weights.""" - return { - "efficientnetv2_b2_imagenet": copy.deepcopy( - backbone_presets_with_weights["efficientnetv2_b2_imagenet"] - ), - } + return cls.presets @keras_cv_export("keras_cv.models.EfficientNetV2B3Backbone") @@ -298,24 +271,38 @@ def presets_with_weights(cls): return {} -EfficientNetV2B0Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2B0" +setattr( + EfficientNetV2SBackbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2S"), ) -EfficientNetV2B1Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2B1" +setattr( + EfficientNetV2MBackbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2M"), ) -EfficientNetV2B2Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2B2" +setattr( + EfficientNetV2LBackbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2L"), ) -EfficientNetV2B3Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2B3" +setattr( + EfficientNetV2B0Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2B0"), ) -EfficientNetV2SBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2S" +setattr( + EfficientNetV2B1Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2B1"), ) -EfficientNetV2MBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2M" +setattr( + EfficientNetV2B2Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2B2"), ) -EfficientNetV2LBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format( - name="EfficientNetV2L" +setattr( + EfficientNetV2B3Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="EfficientNetV2B3"), ) From 5373b916d15544a6763347575d440570ec617495 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Fri, 18 Aug 2023 13:30:37 -0600 Subject: [PATCH 09/17] Add pre-trained MobileNetV3Small preset (#2034) --- .../mobilenet_v3_backbone_presets.py | 17 +++++++++++++++++ .../mobilenet_v3_backbone_presets_test.py | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py index f7cb46171c..8f350fd7b5 100644 --- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py +++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py @@ -200,6 +200,23 @@ "weights_url": "https://storage.googleapis.com/keras-cv/models/mobilenetv3/mobilenetv3_large_imagenet_backbone.h5", # noqa: E501 "weights_hash": "ec55ea2f4f4ee9a2ddf3ee8e2dd784e9d5732690c1fc5afc7e1b2a66703f3337", # noqa: E501 }, + "mobilenet_v3_small_imagenet": { + "metadata": { + "description": ( + "MobileNetV3 model with 28 layers where the batch " + "normalization and hard-swish activation are applied after the " + "convolution layers. " + "Pre-trained on the ImageNet 2012 classification task." + ), + "params": 2_994_518, + "official_name": "MobileNetV3", + "path": "mobilenetv3", + }, + "class_name": "keras_cv>MobileNetV3Backbone", + "config": backbone_presets_no_weights["mobilenet_v3_small"]["config"], + "weights_url": "https://storage.googleapis.com/keras-cv/models/mobilenetv3/mobilenetv3_small_imagenet_backbone.h5", # noqa: E501 + "weights_hash": "592c2707edfc6c673a3b2d9aaf76dee678557f4a32d573c74f96c8122effa503", # noqa: E501 + }, } backbone_presets = { diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py index 2d36e60166..aa0c806aab 100644 --- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py +++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py @@ -36,7 +36,7 @@ def setUp(self): self.input_batch = np.ones(shape=(8, 224, 224, 3)) def test_backbone_output(self): - model = MobileNetV3Backbone.from_preset("mobilenet_v3_large_imagenet") + model = MobileNetV3Backbone.from_preset("mobilenet_v3_small_imagenet") outputs = model(self.input_batch) # The forward pass from a preset should be stable! @@ -45,7 +45,7 @@ def test_backbone_output(self): # We should only update these numbers if we are updating a weights # file, or have found a discrepancy with the upstream source. outputs = outputs[0, 0, 0, :5] - expected = [0.27, 0.01, 0.29, 0.08, -0.12] + expected = [0.25, 1.13, -0.26, 0.10, 0.03] # Keep a high tolerance, so we are robust to different hardware. self.assertAllClose( ops.convert_to_numpy(outputs), expected, atol=0.01, rtol=0.01 From 30bbd60ac455d0415814815d4ce4e0cb5cbdee94 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Mon, 21 Aug 2023 22:02:06 +0530 Subject: [PATCH 10/17] alias fix + doc fix in preset (#2035) --- .../models/backbones/mobilenet_v3/mobilenet_v3_aliases.py | 8 ++++++-- .../mobilenet_v3/mobilenet_v3_backbone_presets.py | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py index 9d9c91e8e0..da19b81f28 100644 --- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py +++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py @@ -73,12 +73,16 @@ def __new__( @classproperty def presets(cls): """Dictionary of preset names and configurations.""" - return {} + return { + "mobilenet_v3_small_imagenet": copy.deepcopy( + backbone_presets["mobilenet_v3_small_imagenet"] + ), + } @classproperty def presets_with_weights(cls): """Dictionary of preset names and configurations.""" - return {} + return cls.presets @keras_cv_export("keras_cv.models.MobileNetV3LargeBackbone") diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py index 8f350fd7b5..75ad436fe1 100644 --- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py +++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py @@ -191,7 +191,7 @@ "convolution layers. " "Pre-trained on the ImageNet 2012 classification task." ), - "params": 2_994_518, + "params": 2994518, "official_name": "MobileNetV3", "path": "mobilenetv3", }, @@ -203,12 +203,12 @@ "mobilenet_v3_small_imagenet": { "metadata": { "description": ( - "MobileNetV3 model with 28 layers where the batch " + "MobileNetV3 model with 14 layers where the batch " "normalization and hard-swish activation are applied after the " "convolution layers. " "Pre-trained on the ImageNet 2012 classification task." ), - "params": 2_994_518, + "params": 933502, "official_name": "MobileNetV3", "path": "mobilenetv3", }, From 96ac1ee72ba27ca0e194886d0d9baa3183630200 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Mon, 21 Aug 2023 22:34:56 +0530 Subject: [PATCH 11/17] keras.layers.add->keras.layers.Add (#2033) --- keras_cv/layers/fusedmbconv.py | 2 +- keras_cv/layers/mbconv.py | 2 +- .../backbones/efficientnet_lite/efficientnet_lite_backbone.py | 2 +- .../backbones/efficientnet_v1/efficientnet_v1_backbone.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/keras_cv/layers/fusedmbconv.py b/keras_cv/layers/fusedmbconv.py index 489c619dbb..2ac33a54df 100644 --- a/keras_cv/layers/fusedmbconv.py +++ b/keras_cv/layers/fusedmbconv.py @@ -218,7 +218,7 @@ def call(self, inputs): if self.strides == 1 and self.input_filters == self.output_filters: if self.survival_probability: x = self.dropout(x) - x = keras.layers.add([x, inputs], name=self.name + "add") + x = keras.layers.Add(name=self.name + "add")([x, inputs]) return x def get_config(self): diff --git a/keras_cv/layers/mbconv.py b/keras_cv/layers/mbconv.py index fba5b0194d..34a7e0c8d2 100644 --- a/keras_cv/layers/mbconv.py +++ b/keras_cv/layers/mbconv.py @@ -226,7 +226,7 @@ def call(self, inputs): if self.strides == 1 and self.input_filters == self.output_filters: if self.survival_probability: x = self.dropout(x) - x = keras.layers.add([x, inputs], name=self.name + "add") + x = keras.layers.Add(name=self.name + "add")([x, inputs]) return x def get_config(self): diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py index d3a6fd8815..fe8eab1a08 100644 --- a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py +++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py @@ -362,5 +362,5 @@ def apply_efficient_net_lite_block( x = keras.layers.Dropout( dropout_rate, noise_shape=(None, 1, 1, 1), name=name + "drop" )(x) - x = keras.layers.add([x, inputs], name=name + "add") + x = keras.layers.Add(name=name + "add")([x, inputs]) return x diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py index cc39d3d31c..c90bebf198 100644 --- a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py +++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py @@ -449,6 +449,6 @@ def apply_efficientnet_block( noise_shape=(None, 1, 1, 1), name=name + "drop", )(x) - x = keras.layers.add([x, inputs], name=name + "add") + x = keras.layers.Add(name=name + "add")([x, inputs]) return x From f3c629451a59519ef7714d5fb1bf936db68a58a3 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Wed, 23 Aug 2023 01:19:01 +0530 Subject: [PATCH 12/17] Added support of segmentation mask in RandomShear Layer (#2021) * seg mask support * format * add test * add demo * fix * update readme * review comment * fix --- .../segmentation/random_shear_demo.py | 34 +++++++++++++++++++ keras_cv/layers/preprocessing/README.md | 2 +- keras_cv/layers/preprocessing/random_shear.py | 27 +++++++++++++++ .../layers/preprocessing/random_shear_test.py | 22 +++++++++++- 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 examples/layers/preprocessing/segmentation/random_shear_demo.py diff --git a/examples/layers/preprocessing/segmentation/random_shear_demo.py b/examples/layers/preprocessing/segmentation/random_shear_demo.py new file mode 100644 index 0000000000..1a78a0c8fb --- /dev/null +++ b/examples/layers/preprocessing/segmentation/random_shear_demo.py @@ -0,0 +1,34 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_shear_demo.py shows how to use the RandomShear preprocessing layer. + +Uses the oxford iiit pet_dataset. In this script the pets +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import demo_utils +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +def main(): + ds = demo_utils.load_oxford_iiit_pet_dataset() + randomshear = preprocessing.RandomShear(0.5, 0.5) + ds = ds.map(randomshear, num_parallel_calls=tf.data.AUTOTUNE) + demo_utils.visualize_dataset(ds) + + +if __name__ == "__main__": + main() diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md index b3fef9e96c..4f077d8cec 100644 --- a/keras_cv/layers/preprocessing/README.md +++ b/keras_cv/layers/preprocessing/README.md @@ -37,7 +37,7 @@ The provided table gives an overview of the different augmentation layers availa | RandomRotation | ✅ | ✅ | ✅ | ✅ | | RandomSaturation | ✅ | ✅ | ✅ | ✅ | | RandomSharpness | ✅ | ✅ | ✅ | ✅ | -| RandomShear | ✅ | ❌ | ✅ | ✅ | +| RandomShear | ✅ | ✅ | ✅ | ✅ | | RandomTranslation | ✅ | ✅ | ✅ | ✅ | | RandomZoom | ✅ | ❌ | ❌ | ✅ | | RepeatedAugmentation + | - | - | - | - | diff --git a/keras_cv/layers/preprocessing/random_shear.py b/keras_cv/layers/preprocessing/random_shear.py index 7c20a9f8d5..dc69288d70 100644 --- a/keras_cv/layers/preprocessing/random_shear.py +++ b/keras_cv/layers/preprocessing/random_shear.py @@ -219,6 +219,33 @@ def _build_shear_y_transform_matrix(shear_y): def augment_labels(self, labels, transformations, **kwargs): return labels + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): + x, y = transformations["shear_x"], transformations["shear_y"] + + if x is not None: + transforms_x = self._build_shear_x_transform_matrix(x) + segmentation_masks = preprocessing.transform( + images=segmentation_masks, + transforms=transforms_x, + interpolation="nearest", + fill_mode=self.fill_mode, + fill_value=self.fill_value, + ) + + if y is not None: + transforms_y = self._build_shear_y_transform_matrix(y) + segmentation_masks = preprocessing.transform( + images=segmentation_masks, + transforms=transforms_y, + interpolation="nearest", + fill_mode=self.fill_mode, + fill_value=self.fill_value, + ) + + return segmentation_masks + def augment_bounding_boxes( self, bounding_boxes, transformations, images=None, **kwargs ): diff --git a/keras_cv/layers/preprocessing/random_shear_test.py b/keras_cv/layers/preprocessing/random_shear_test.py index 180e6a16a7..51933b7f0b 100644 --- a/keras_cv/layers/preprocessing/random_shear_test.py +++ b/keras_cv/layers/preprocessing/random_shear_test.py @@ -27,19 +27,33 @@ def test_aggressive_shear_fills_at_least_some_pixels(self): [2 * tf.ones(img_shape), tf.ones(img_shape)], axis=0, ) + ys_segmentation_masks = tf.stack( + [2 * tf.ones(img_shape), tf.ones(img_shape)], + axis=0, + ) xs = tf.cast(xs, tf.float32) + ys_segmentation_masks = tf.cast(ys_segmentation_masks, tf.float32) fill_value = 0.0 layer = preprocessing.RandomShear( x_factor=(3, 3), seed=0, fill_mode="constant", fill_value=fill_value ) xs = layer(xs) + ys_segmentation_masks = layer(ys_segmentation_masks) # Some pixels should be replaced with fill value self.assertTrue(tf.math.reduce_any(xs[0] == fill_value)) self.assertTrue(tf.math.reduce_any(xs[0] == 2.0)) self.assertTrue(tf.math.reduce_any(xs[1] == fill_value)) self.assertTrue(tf.math.reduce_any(xs[1] == 1.0)) + self.assertTrue( + tf.math.reduce_any(ys_segmentation_masks[0] == fill_value) + ) + self.assertTrue(tf.math.reduce_any(ys_segmentation_masks[0] == 2.0)) + self.assertTrue( + tf.math.reduce_any(ys_segmentation_masks[1] == fill_value) + ) + self.assertTrue(tf.math.reduce_any(ys_segmentation_masks[1] == 1.0)) def test_return_shapes(self): """test return dict keys and value pairs""" @@ -55,6 +69,9 @@ def test_return_shapes(self): "classes": tf.random.uniform((2, 3), 0, 1), } + # randomly sample segmentation masks + ys_segmentation_masks = tf.ones((2, 512, 512, 3)) + layer = preprocessing.RandomShear( x_factor=(0.1, 0.3), y_factor=(0.1, 0.3), @@ -68,18 +85,21 @@ def test_return_shapes(self): "images": xs, "targets": ys_labels, "bounding_boxes": ys_bounding_boxes, + "segmentation_masks": ys_segmentation_masks, } ) - xs, ys_labels, ys_bounding_boxes = ( + xs, ys_labels, ys_bounding_boxes, ys_segmentation_masks = ( outputs["images"], outputs["targets"], outputs["bounding_boxes"], + outputs["segmentation_masks"], ) ys_bounding_boxes = bounding_box.to_dense(ys_bounding_boxes) self.assertEqual(xs.shape, [2, 512, 512, 3]) self.assertEqual(ys_labels.shape, [2, 10]) self.assertEqual(ys_bounding_boxes["boxes"].shape, [2, 3, 4]) self.assertEqual(ys_bounding_boxes["classes"].shape, [2, 3]) + self.assertEqual(ys_segmentation_masks.shape, [2, 512, 512, 3]) def test_single_image_input(self): """test for single image input""" From c740f81b59fbf3830c2f0c0131e84e872b4022f1 Mon Sep 17 00:00:00 2001 From: Aritra Roy Gosthipaty Date: Wed, 23 Aug 2023 01:46:19 +0530 Subject: [PATCH 13/17] [RandomZoom] Supporting Segmentation Masks (#2010) * chore: initial commit * chore: adding initial tests * update: readme for preprocessing * fix lint --- .../segmentation/random_zoom_demo.py | 33 +++++++++++++++++ keras_cv/layers/preprocessing/README.md | 2 +- keras_cv/layers/preprocessing/random_zoom.py | 25 +++++++++++++ .../layers/preprocessing/random_zoom_test.py | 37 ++++++++++++++----- 4 files changed, 86 insertions(+), 11 deletions(-) create mode 100644 examples/layers/preprocessing/segmentation/random_zoom_demo.py diff --git a/examples/layers/preprocessing/segmentation/random_zoom_demo.py b/examples/layers/preprocessing/segmentation/random_zoom_demo.py new file mode 100644 index 0000000000..17a6f84536 --- /dev/null +++ b/examples/layers/preprocessing/segmentation/random_zoom_demo.py @@ -0,0 +1,33 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_zoom_demo.py shows how to use the RandomZoom preprocessing layer. +Uses the oxford iiit pet_dataset. In this script the pets +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import demo_utils +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +def main(): + ds = demo_utils.load_oxford_iiit_pet_dataset() + randomzoom = preprocessing.RandomZoom(0.5, 0.5) + ds = ds.map(randomzoom, num_parallel_calls=tf.data.AUTOTUNE) + demo_utils.visualize_dataset(ds) + + +if __name__ == "__main__": + main() diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md index 4f077d8cec..62f0b00f0d 100644 --- a/keras_cv/layers/preprocessing/README.md +++ b/keras_cv/layers/preprocessing/README.md @@ -39,7 +39,7 @@ The provided table gives an overview of the different augmentation layers availa | RandomSharpness | ✅ | ✅ | ✅ | ✅ | | RandomShear | ✅ | ✅ | ✅ | ✅ | | RandomTranslation | ✅ | ✅ | ✅ | ✅ | -| RandomZoom | ✅ | ❌ | ❌ | ✅ | +| RandomZoom | ✅ | ✅ | ❌ | ✅ | | RepeatedAugmentation + | - | - | - | - | | Rescaling | ❌ | ✅ | ✅ | ✅ | | Resizing | ❌ | ✅ | ✅ | ❌ | diff --git a/keras_cv/layers/preprocessing/random_zoom.py b/keras_cv/layers/preprocessing/random_zoom.py index 98b08782bc..2e860da649 100644 --- a/keras_cv/layers/preprocessing/random_zoom.py +++ b/keras_cv/layers/preprocessing/random_zoom.py @@ -193,6 +193,31 @@ def augment_images(self, images, transformations, **kwargs): def augment_labels(self, labels, transformations, **kwargs): return labels + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): + segmentation_masks = preprocessing_utils.ensure_tensor( + segmentation_masks, self.compute_dtype + ) + original_shape = segmentation_masks.shape + mask_shape = tf.shape(segmentation_masks) + mask_hd = tf.cast(mask_shape[H_AXIS], tf.float32) + mask_wd = tf.cast(mask_shape[W_AXIS], tf.float32) + width_zooms = transformations["width_zooms"] + height_zooms = transformations["height_zooms"] + zooms = tf.cast( + tf.concat([width_zooms, height_zooms], axis=1), dtype=tf.float32 + ) + outputs = preprocessing_utils.transform( + segmentation_masks, + self.get_zoom_matrix(zooms, mask_hd, mask_wd), + fill_mode=self.fill_mode, + fill_value=self.fill_value, + interpolation="nearest", + ) + outputs.set_shape(original_shape) + return outputs + def get_zoom_matrix(self, zooms, image_height, image_width, name=None): """Returns projective transform(s) for the given zoom(s). diff --git a/keras_cv/layers/preprocessing/random_zoom_test.py b/keras_cv/layers/preprocessing/random_zoom_test.py index 219bc55779..0fdcf6eec3 100644 --- a/keras_cv/layers/preprocessing/random_zoom_test.py +++ b/keras_cv/layers/preprocessing/random_zoom_test.py @@ -35,20 +35,37 @@ def test_output_shapes(self, height_factor, width_factor): orig_height = 5 orig_width = 8 channels = 3 - input = tf.random.uniform( - shape=[num_samples, orig_height, orig_width, channels], - ) + input = { + "images": tf.random.uniform( + shape=[num_samples, orig_height, orig_width, channels], + ), + "segmentation_masks": tf.random.uniform( + shape=[num_samples, orig_height, orig_width, 1], + minval=0, + maxval=2, + ), + } layer = RandomZoom(height_factor, width_factor) actual_output = layer(input) - expected_output = tf.random.uniform( - shape=( - num_samples, - orig_height, - orig_width, - channels, + expected_output = { + "images": tf.random.uniform( + shape=[num_samples, orig_height, orig_width, channels], ), + "segmentation_masks": tf.random.uniform( + shape=[num_samples, orig_height, orig_width, 1], + minval=0, + maxval=2, + ), + } + # Check output shape of images + self.assertAllEqual( + expected_output["images"].shape, actual_output["images"].shape + ) + # Check output shape of segmentation masks + self.assertAllEqual( + expected_output["segmentation_masks"].shape, + actual_output["segmentation_masks"].shape, ) - self.assertAllEqual(expected_output.shape, actual_output.shape) def test_random_zoom_in_numeric(self): for dtype in (np.int64, np.float32): From d01aee44a73c228c1981ffd913fd7a96ad40b0c0 Mon Sep 17 00:00:00 2001 From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com> Date: Wed, 23 Aug 2023 02:11:44 +0530 Subject: [PATCH 14/17] Add support of segmentation mask in RandomCutout (#2004) * added support of segmentation-mask * added demo * add test * update readme * random cutout removed from mask * update test --- .../segmentation/random_cutout_demo.py | 34 +++++++++++++++++++ keras_cv/layers/preprocessing/README.md | 2 +- .../layers/preprocessing/random_cutout.py | 5 +++ .../preprocessing/random_cutout_test.py | 6 ++++ 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 examples/layers/preprocessing/segmentation/random_cutout_demo.py diff --git a/examples/layers/preprocessing/segmentation/random_cutout_demo.py b/examples/layers/preprocessing/segmentation/random_cutout_demo.py new file mode 100644 index 0000000000..71c3631b9b --- /dev/null +++ b/examples/layers/preprocessing/segmentation/random_cutout_demo.py @@ -0,0 +1,34 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_cutout_demo.py shows how to use the RandomCutout preprocessing layer. + +Uses the oxford iiit pet_dataset. In this script the pets +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import demo_utils +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +def main(): + ds = demo_utils.load_oxford_iiit_pet_dataset() + randomcutout = preprocessing.RandomCutout(0.5, 0.5) + ds = ds.map(randomcutout, num_parallel_calls=tf.data.AUTOTUNE) + demo_utils.visualize_dataset(ds) + + +if __name__ == "__main__": + main() diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md index 62f0b00f0d..2e4eedf2fd 100644 --- a/keras_cv/layers/preprocessing/README.md +++ b/keras_cv/layers/preprocessing/README.md @@ -29,7 +29,7 @@ The provided table gives an overview of the different augmentation layers availa | RandomContrast | ✅ | ✅ | ✅ | ✅ | | RandomCropAndResize | ❌ | ✅ | ✅ | ❌ | | RandomCrop | ✅ | ❌ | ✅ | ✅ | -| RandomCutout | ❌ | ❌ | ❌ | ✅ | +| RandomCutout | ❌ | ✅ | ❌ | ✅ | | RandomFlip | ✅ | ✅ | ✅ | ✅ | | RandomGaussianBlur | ❌ | ✅ | ✅ | ✅ | | RandomHue | ✅ | ✅ | ✅ | ✅ | diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py index 24aed8c455..4eb4bb4a24 100644 --- a/keras_cv/layers/preprocessing/random_cutout.py +++ b/keras_cv/layers/preprocessing/random_cutout.py @@ -120,6 +120,11 @@ def augment_image(self, image, transformation=None, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label + def augment_segmentation_mask( + self, segmentation_masks, transformation=None, **kwargs + ): + return segmentation_masks + def _compute_rectangle_position(self, inputs): input_shape = tf.shape(inputs) image_height, image_width = ( diff --git a/keras_cv/layers/preprocessing/random_cutout_test.py b/keras_cv/layers/preprocessing/random_cutout_test.py index 818486749b..14930b6fd1 100644 --- a/keras_cv/layers/preprocessing/random_cutout_test.py +++ b/keras_cv/layers/preprocessing/random_cutout_test.py @@ -45,23 +45,29 @@ def _run_test(self, height_factor, width_factor): def test_return_shapes(self): xs = np.ones((2, 512, 512, 3)) + ys_segmentation_masks = np.ones((2, 512, 512, 3)) layer = preprocessing.RandomCutout( height_factor=0.5, width_factor=0.5, seed=1 ) xs = layer(xs) + ys_segmentation_masks = layer(ys_segmentation_masks) self.assertEqual(xs.shape, [2, 512, 512, 3]) + self.assertEqual(ys_segmentation_masks.shape, [2, 512, 512, 3]) def test_return_shapes_single_element(self): xs = np.ones((512, 512, 3)) + ys_segmentation_masks = np.ones((512, 512, 3)) layer = preprocessing.RandomCutout( height_factor=0.5, width_factor=0.5, seed=1 ) xs = layer(xs) + ys_segmentation_masks = layer(ys_segmentation_masks) self.assertEqual(xs.shape, [512, 512, 3]) + self.assertEqual(ys_segmentation_masks.shape, [512, 512, 3]) def test_random_cutout_single_float(self): self._run_test(0.5, 0.5) From b038f583164fdc950acfffb599fa57f77057cc87 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Thu, 24 Aug 2023 14:34:20 -0600 Subject: [PATCH 15/17] Remove forward slashes from layer names for backbones (#2037) --- .../backbones/densenet/densenet_backbone.py | 6 +++--- .../mobilenet_v3/mobilenet_v3_backbone.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/keras_cv/models/backbones/densenet/densenet_backbone.py b/keras_cv/models/backbones/densenet/densenet_backbone.py index 98b9eea0e5..28109b64fa 100644 --- a/keras_cv/models/backbones/densenet/densenet_backbone.py +++ b/keras_cv/models/backbones/densenet/densenet_backbone.py @@ -88,12 +88,12 @@ def __init__( x = keras.layers.Rescaling(1 / 255.0)(x) x = keras.layers.Conv2D( - 64, 7, strides=2, use_bias=False, padding="same", name="conv1/conv" + 64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv" )(x) x = keras.layers.BatchNormalization( - axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1/bn" + axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1_bn" )(x) - x = keras.layers.Activation("relu", name="conv1/relu")(x) + x = keras.layers.Activation("relu", name="conv1_relu")(x) x = keras.layers.MaxPooling2D( 3, strides=2, padding="same", name="pool1" )(x) diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py index f92d177e99..bd033c282f 100644 --- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py @@ -126,7 +126,7 @@ def __init__( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name="Conv/BatchNorm", + name="Conv_BatchNorm", )(x) x = apply_hard_swish(x) @@ -161,7 +161,7 @@ def __init__( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name="Conv_1/BatchNorm", + name="Conv_1_BatchNorm", )(x) x = apply_hard_swish(x) @@ -291,11 +291,11 @@ def apply_inverted_res_block( activation = keras.activations.get(activation) shortcut = x - prefix = "expanded_conv/" + prefix = "expanded_conv_" infilters = x.shape[CHANNEL_AXIS] if expansion_index > 0: - prefix = f"expanded_conv_{expansion_index}/" + prefix = f"expanded_conv_{expansion_index}_" x = keras.layers.Conv2D( adjust_channels(infilters * expansion), @@ -308,14 +308,14 @@ def apply_inverted_res_block( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name=prefix + "expand/BatchNorm", + name=prefix + "expand_BatchNorm", )(x) x = activation(x) if stride == 2: x = keras.layers.ZeroPadding2D( padding=utils.correct_pad_downsample(x, kernel_size), - name=prefix + "depthwise/pad", + name=prefix + "depthwise_pad", )(x) x = keras.layers.DepthwiseConv2D( @@ -329,7 +329,7 @@ def apply_inverted_res_block( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name=prefix + "depthwise/BatchNorm", + name=prefix + "depthwise_BatchNorm", )(x) x = activation(x) @@ -353,7 +353,7 @@ def apply_inverted_res_block( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name=prefix + "project/BatchNorm", + name=prefix + "project_BatchNorm", )(x) if stride == 1 and infilters == filters: From ab812d1b4d6414fac0100238c982472dc5d6e8c6 Mon Sep 17 00:00:00 2001 From: David Landup <60978046+DavidLandup0@users.noreply.github.com> Date: Fri, 25 Aug 2023 00:56:39 +0200 Subject: [PATCH 16/17] [DeepVision Port] SegFormer and Mix-Transformers (#1946) * initial dump * add all basic layers, port roughly to keras core ops * updated .gitignore * segformer head and formatting * cleanup * remove tf call * remove tf * migrating to more keras ops * cleanups and fixes * fix reshaping * comments * from presets api, keras.ops -> ops * embed_dims -> embedding_dims * addressing some PR comments * docstrings, argument update * depths arg * sync * compute output shapes * segformer progress * head * softmax * remove softmax * undo compute_output_shapes() * efficientmultiheadattention -> segformermultiheadattention * docstrings * softmax output * segformer presets * updating segformer presets * segformer presets * import aliases * refactoring * pr comments * pr comments * add aliases * aliases ot init * refactor fix * import keras_cv_export * fix presets/aliases and add copyright * linter warnings * linter errors * consistency in presets * return config * fix serialization * Some cleanup + more tests * Fix DropPath layer (need to update tests + add shim for tf.keras * Finish DropPath layer * Use static shape in backbone * Formatting * Switch back to ops.shape * documentation * documentation * remove default num classes * fix docs --------- Co-authored-by: ianjjohnson <3072903+ianstenbit@users.noreply.github.com> --- .gitignore | 1 + keras_cv/backend/__init__.py | 1 + keras_cv/backend/random.py | 20 ++ keras_cv/layers/__init__.py | 9 + .../hierarchical_transformer_encoder.py | 140 ++++++++++ .../layers/overlapping_patching_embedding.py | 85 ++++++ keras_cv/layers/regularization/drop_path.py | 20 +- .../layers/regularization/drop_path_test.py | 18 +- .../layers/segformer_multihead_attention.py | 132 +++++++++ keras_cv/models/__init__.py | 28 ++ .../backbones/mix_transformer/__init__.py | 13 + .../mix_transformer_aliases.py | 262 ++++++++++++++++++ .../mix_transformer_backbone.py | 188 +++++++++++++ .../mix_transformer_backbone_presets.py | 153 ++++++++++ .../mix_transformer_backbone_presets_test.py | 100 +++++++ .../mix_transformer_backbone_test.py | 69 +++++ keras_cv/models/segmentation/__init__.py | 1 + .../models/segmentation/segformer/__init__.py | 15 + .../segmentation/segformer/segformer.py | 175 ++++++++++++ .../segformer/segformer_aliases.py | 244 ++++++++++++++++ .../segformer/segformer_presets.py | 105 +++++++ .../segmentation/segformer/segformer_test.py | 92 ++++++ 22 files changed, 1855 insertions(+), 16 deletions(-) create mode 100644 keras_cv/backend/random.py create mode 100644 keras_cv/layers/hierarchical_transformer_encoder.py create mode 100644 keras_cv/layers/overlapping_patching_embedding.py create mode 100644 keras_cv/layers/segformer_multihead_attention.py create mode 100644 keras_cv/models/backbones/mix_transformer/__init__.py create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py create mode 100644 keras_cv/models/segmentation/segformer/__init__.py create mode 100644 keras_cv/models/segmentation/segformer/segformer.py create mode 100644 keras_cv/models/segmentation/segformer/segformer_aliases.py create mode 100644 keras_cv/models/segmentation/segformer/segformer_presets.py create mode 100644 keras_cv/models/segmentation/segformer/segformer_test.py diff --git a/.gitignore b/.gitignore index 6a59b32803..68d68189bd 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ __pycache__/ .vscode/ .devcontainer/ .coverage +.history diff --git a/keras_cv/backend/__init__.py b/keras_cv/backend/__init__.py index da703722b9..7440acbd38 100644 --- a/keras_cv/backend/__init__.py +++ b/keras_cv/backend/__init__.py @@ -76,6 +76,7 @@ from keras_cv.backend import config # noqa: E402 from keras_cv.backend import ops # noqa: E402 +from keras_cv.backend import random # noqa: E402 from keras_cv.backend import tf_ops # noqa: E402 diff --git a/keras_cv/backend/random.py b/keras_cv/backend/random.py new file mode 100644 index 0000000000..21d4b08c7d --- /dev/null +++ b/keras_cv/backend/random.py @@ -0,0 +1,20 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.backend.config import multi_backend + +if multi_backend(): + from keras_core.random import * # noqa: F403, F401 +else: + from keras_core.src.backend.tensorflow.random import * # noqa: F403, F401 diff --git a/keras_cv/layers/__init__.py b/keras_cv/layers/__init__.py index c8b01f2769..342a942f64 100644 --- a/keras_cv/layers/__init__.py +++ b/keras_cv/layers/__init__.py @@ -19,6 +19,9 @@ from keras_cv.layers.augmenter import Augmenter from keras_cv.layers.feature_pyramid import FeaturePyramid from keras_cv.layers.fusedmbconv import FusedMBConvBlock +from keras_cv.layers.hierarchical_transformer_encoder import ( + HierarchicalTransformerEncoder, +) from keras_cv.layers.mbconv import MBConvBlock from keras_cv.layers.object_detection.anchor_generator import AnchorGenerator from keras_cv.layers.object_detection.box_matcher import BoxMatcher @@ -32,6 +35,9 @@ CenterNetLabelEncoder, ) from keras_cv.layers.object_detection_3d.voxelization import DynamicVoxelization +from keras_cv.layers.overlapping_patching_embedding import ( + OverlappingPatchingAndEmbedding, +) from keras_cv.layers.preprocessing.aug_mix import AugMix from keras_cv.layers.preprocessing.auto_contrast import AutoContrast from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( @@ -124,6 +130,9 @@ from keras_cv.layers.regularization.dropblock_2d import DropBlock2D from keras_cv.layers.regularization.squeeze_excite import SqueezeAndExcite2D from keras_cv.layers.regularization.stochastic_depth import StochasticDepth +from keras_cv.layers.segformer_multihead_attention import ( + SegFormerMultiheadAttention, +) from keras_cv.layers.spatial_pyramid import SpatialPyramidPooling from keras_cv.layers.transformer_encoder import TransformerEncoder from keras_cv.layers.vit_layers import PatchingAndEmbedding diff --git a/keras_cv/layers/hierarchical_transformer_encoder.py b/keras_cv/layers/hierarchical_transformer_encoder.py new file mode 100644 index 0000000000..ee67a17b56 --- /dev/null +++ b/keras_cv/layers/hierarchical_transformer_encoder.py @@ -0,0 +1,140 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +from keras_cv.api_export import keras_cv_export +from keras_cv.backend import keras +from keras_cv.backend import ops +from keras_cv.layers.regularization.drop_path import DropPath +from keras_cv.layers.segformer_multihead_attention import ( + SegFormerMultiheadAttention, +) + + +@keras_cv_export("keras_cv.layers.HierarchicalTransformerEncoder") +class HierarchicalTransformerEncoder(keras.layers.Layer): + """ + Hierarchical transformer encoder block implementation as a Keras Layer. + The layer uses `SegFormerMultiheadAttention` as a `MultiHeadAttention` + alternative for computational efficiency, and is meant to be used + within the SegFormer architecture. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501 + - [Official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501 + - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/hierarchical_transformer_encoder.py) # noqa: E501 + + Args: + project_dim: integer, the dimensionality of the projection of the + encoder, and output of the `SegFormerMultiheadAttention` layer. + Due to the residual addition the input dimensionality has to be + equal to the output dimensionality. + num_heads: integer, the number of heads for the + `SegFormerMultiheadAttention` layer. + drop_prob: float, the probability of dropping a random + sample using the `DropPath` layer. Defaults to `0.0`. + layer_norm_epsilon: float, the epsilon for + `LayerNormalization` layers. Defaults to `1e-06` + sr_ratio: integer, the ratio to use within + `SegFormerMultiheadAttention`. If set to > 1, a `Conv2D` + layer is used to reduce the length of the sequence. Defaults to `1`. + + Basic usage: + + ``` + project_dim = 1024 + num_heads = 4 + patch_size = 16 + + encoded_patches = keras_cv.layers.OverlappingPatchingAndEmbedding( + project_dim=project_dim, patch_size=patch_size)(img_batch) + + trans_encoded = keras_cv.layers.HierarchicalTransformerEncoder(project_dim=project_dim, + num_heads=num_heads, + sr_ratio=1)(encoded_patches) + + print(trans_encoded.shape) # (1, 3136, 1024) + ``` + """ + + def __init__( + self, + project_dim, + num_heads, + sr_ratio=1, + drop_prob=0.0, + layer_norm_epsilon=1e-6, + **kwargs, + ): + super().__init__(**kwargs) + self.project_dim = project_dim + self.num_heads = num_heads + self.drop_prop = drop_prob + + self.norm1 = keras.layers.LayerNormalization(epsilon=layer_norm_epsilon) + self.attn = SegFormerMultiheadAttention( + project_dim, num_heads, sr_ratio + ) + self.drop_path = DropPath(drop_prob) + self.norm2 = keras.layers.LayerNormalization(epsilon=layer_norm_epsilon) + self.mlp = self.MixFFN( + channels=project_dim, + mid_channels=int(project_dim * 4), + ) + + def build(self, input_shape): + super().build(input_shape) + self.H = ops.sqrt(ops.cast(input_shape[1], "float32")) + self.W = ops.sqrt(ops.cast(input_shape[2], "float32")) + + def call(self, x): + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + def get_config(self): + config = super().get_config() + config.update( + { + "mlp": keras.saving.serialize_keras_object(self.mlp), + "project_dim": self.project_dim, + "num_heads": self.num_heads, + "drop_prop": self.drop_prop, + } + ) + return config + + class MixFFN(keras.layers.Layer): + def __init__(self, channels, mid_channels): + super().__init__() + self.fc1 = keras.layers.Dense(mid_channels) + self.dwconv = keras.layers.DepthwiseConv2D( + kernel_size=3, + strides=1, + padding="same", + ) + self.fc2 = keras.layers.Dense(channels) + + def call(self, x): + x = self.fc1(x) + shape = ops.shape(x) + H, W = int(math.sqrt(shape[1])), int(math.sqrt(shape[1])) + B, C = shape[0], shape[2] + x = ops.reshape(x, (B, H, W, C)) + x = self.dwconv(x) + x = ops.reshape(x, (B, -1, C)) + x = ops.nn.gelu(x) + x = self.fc2(x) + return x diff --git a/keras_cv/layers/overlapping_patching_embedding.py b/keras_cv/layers/overlapping_patching_embedding.py new file mode 100644 index 0000000000..69060087ec --- /dev/null +++ b/keras_cv/layers/overlapping_patching_embedding.py @@ -0,0 +1,85 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.api_export import keras_cv_export +from keras_cv.backend import keras +from keras_cv.backend import ops + + +@keras_cv_export("keras_cv.layers.OverlappingPatchingAndEmbedding") +class OverlappingPatchingAndEmbedding(keras.layers.Layer): + def __init__(self, project_dim=32, patch_size=7, stride=4, **kwargs): + """ + Overlapping Patching and Embedding layer. Differs from `PatchingAndEmbedding` + in that the patch size does not affect the sequence length. It's fully derived + from the `stride` parameter. Additionally, no positional embedding is done + as part of the layer - only a projection using a `Conv2D` layer. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501 + - [Official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501 + - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/hierarchical_transformer_encoder.py) # noqa: E501 + + Args: + project_dim: integer, the dimensionality of the projection. + Defaults to `32`. + patch_size: integer, the size of the patches to encode. + Defaults to `7`. + stride: integer, the stride to use for the patching before + projection. Defaults to `5`. + + Basic usage: + + ``` + project_dim = 1024 + patch_size = 16 + + encoded_patches = keras_cv.layers.OverlappingPatchingAndEmbedding( + project_dim=project_dim, patch_size=patch_size)(img_batch) + + print(encoded_patches.shape) # (1, 3136, 1024) + ``` + """ + super().__init__(**kwargs) + + self.project_dim = project_dim + self.patch_size = patch_size + self.stride = stride + + self.proj = keras.layers.Conv2D( + filters=project_dim, + kernel_size=patch_size, + strides=stride, + padding="same", + ) + self.norm = keras.layers.LayerNormalization() + + def call(self, x): + x = self.proj(x) + # B, H, W, C + shape = x.shape + x = ops.reshape(x, (-1, shape[1] * shape[2], shape[3])) + x = self.norm(x) + return x + + def get_config(self): + config = super().get_config() + config.update( + { + "project_dim": self.project_dim, + "patch_size": self.patch_size, + "stride": self.stride, + } + ) + return config diff --git a/keras_cv/layers/regularization/drop_path.py b/keras_cv/layers/regularization/drop_path.py index e254f29493..4475e2365f 100644 --- a/keras_cv/layers/regularization/drop_path.py +++ b/keras_cv/layers/regularization/drop_path.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from tensorflow import keras - from keras_cv.api_export import keras_cv_export +from keras_cv.backend import keras +from keras_cv.backend import ops +from keras_cv.backend import random @keras_cv_export("keras_cv.layers.DropPath") -class DropPath(keras.__internal__.layers.BaseRandomLayer): +class DropPath(keras.layers.Layer): """ Implements the DropPath layer. DropPath randomly drops samples during training with a probability of `rate`. Note that this layer drops individual @@ -47,7 +48,7 @@ class DropPath(keras.__internal__.layers.BaseRandomLayer): """ # noqa: E501 def __init__(self, rate=0.5, seed=None, **kwargs): - super().__init__(seed=seed, **kwargs) + super().__init__(**kwargs) self.rate = rate self.seed = seed @@ -55,12 +56,13 @@ def call(self, x, training=None): if self.rate == 0.0 or not training: return x else: - keep_prob = 1 - self.rate - drop_map_shape = (x.shape[0],) + (1,) * (len(x.shape) - 1) - drop_map = keras.backend.random_bernoulli( - drop_map_shape, p=keep_prob, seed=self.seed + batch_size = x.shape[0] or ops.shape(x)[0] + drop_map_shape = (batch_size,) + (1,) * (len(x.shape) - 1) + drop_map = ops.cast( + random.uniform(drop_map_shape, seed=self.seed) > self.rate, + x.dtype, ) - x = x / keep_prob + x = x / (1.0 - self.rate) x = x * drop_map return x diff --git a/keras_cv/layers/regularization/drop_path_test.py b/keras_cv/layers/regularization/drop_path_test.py index 22f63b5223..00b4b790f0 100644 --- a/keras_cv/layers/regularization/drop_path_test.py +++ b/keras_cv/layers/regularization/drop_path_test.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np +import pytest import tensorflow as tf from keras_cv.layers import DropPath @@ -23,7 +25,7 @@ class DropPathTest(TestCase): def test_input_unchanged_in_eval_mode(self): layer = DropPath(rate=0.5, seed=42) - inputs = tf.random.uniform(self.FEATURE_SHAPE) + inputs = np.random.uniform(size=self.FEATURE_SHAPE) outputs = layer(inputs, training=False) @@ -31,7 +33,7 @@ def test_input_unchanged_in_eval_mode(self): def test_input_unchanged_with_rate_equal_to_zero(self): layer = DropPath(rate=0, seed=42) - inputs = tf.random.uniform(self.FEATURE_SHAPE) + inputs = np.random.uniform(size=self.FEATURE_SHAPE) outputs = layer(inputs, training=True) @@ -39,7 +41,7 @@ def test_input_unchanged_with_rate_equal_to_zero(self): def test_input_gets_partially_zeroed_out_in_train_mode(self): layer = DropPath(rate=0.2, seed=42) - inputs = tf.random.uniform(self.FEATURE_SHAPE) + inputs = np.random.uniform(size=self.FEATURE_SHAPE) outputs = layer(inputs, training=True) @@ -48,9 +50,11 @@ def test_input_gets_partially_zeroed_out_in_train_mode(self): self.assertGreaterEqual(non_zeros_inputs, non_zeros_outputs) + # Because randomness is inconsistent across backends, we just test with 1. + @pytest.mark.tf_keras_only def test_strict_input_gets_partially_zeroed_out_in_train_mode(self): - layer = DropPath(rate=0.5, seed=42) - inputs = tf.random.uniform(self.FEATURE_SHAPE) + layer = DropPath(rate=0.5, seed=10) + inputs = np.random.uniform(size=self.FEATURE_SHAPE) total_non_zero_inputs = 0 total_non_zero_outputs = 0 @@ -66,6 +70,6 @@ def test_strict_input_gets_partially_zeroed_out_in_train_mode(self): self.assertAllInRange( total_non_zero_outputs, - int(0.49 * tf.cast(total_non_zero_inputs, tf.float32)), - int(0.51 * tf.cast(total_non_zero_inputs, tf.float32)), + int(0.40 * tf.cast(total_non_zero_inputs, tf.float32)), + int(0.60 * tf.cast(total_non_zero_inputs, tf.float32)), ) diff --git a/keras_cv/layers/segformer_multihead_attention.py b/keras_cv/layers/segformer_multihead_attention.py new file mode 100644 index 0000000000..203773d4ea --- /dev/null +++ b/keras_cv/layers/segformer_multihead_attention.py @@ -0,0 +1,132 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +from keras_cv.api_export import keras_cv_export +from keras_cv.backend import keras +from keras_cv.backend import ops + + +@keras_cv_export("keras_cv.layers.SegFormerMultiheadAttention") +class SegFormerMultiheadAttention(keras.layers.Layer): + def __init__(self, project_dim, num_heads, sr_ratio): + """ + Efficient MultiHeadAttention implementation as a Keras layer. + A huge bottleneck in scaling transformers is the self-attention layer + with an O(n^2) complexity. + + SegFormerMultiheadAttention performs a sequence reduction (SR) operation + with a given ratio, to reduce the sequence length before performing key and value projections, + reducing the O(n^2) complexity to O(n^2/R) where R is the sequence reduction ratio. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501 + - [NVlabs' official implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501 + - [@sithu31296's reimplementation](https://github.com/sithu31296/semantic-segmentation/blob/main/semseg/models/backbones/mit.py) # noqa: E501 + - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/efficient_attention.py) # noqa: E501 + + Args: + project_dim: integer, the dimensionality of the projection + of the `SegFormerMultiheadAttention` layer. + num_heads: integer, the number of heads to use in the + attention computation. + sr_ratio: integer, the sequence reduction ratio to perform + on the sequence before key and value projections. + + Basic usage: + + ``` + tensor = tf.random.uniform([1, 196, 32]) + output = keras_cv.layers.SegFormerMultiheadAttention(project_dim=768, + num_heads=2, + sr_ratio=4)(tensor) + print(output.shape) # (1, 196, 32) + ``` + """ + super().__init__() + self.num_heads = num_heads + self.sr_ratio = sr_ratio + self.scale = (project_dim // num_heads) ** -0.5 + self.q = keras.layers.Dense(project_dim) + self.k = keras.layers.Dense(project_dim) + self.v = keras.layers.Dense(project_dim) + self.proj = keras.layers.Dense(project_dim) + + if sr_ratio > 1: + self.sr = keras.layers.Conv2D( + filters=project_dim, + kernel_size=sr_ratio, + strides=sr_ratio, + padding="same", + ) + self.norm = keras.layers.LayerNormalization() + + def call(self, x): + input_shape = ops.shape(x) + H, W = int(math.sqrt(input_shape[1])), int(math.sqrt(input_shape[1])) + B, C = input_shape[0], input_shape[2] + + q = self.q(x) + q = ops.reshape( + q, + ( + input_shape[0], + input_shape[1], + self.num_heads, + input_shape[2] // self.num_heads, + ), + ) + q = ops.transpose(q, [0, 2, 1, 3]) + + if self.sr_ratio > 1: + x = ops.reshape( + ops.transpose(x, [0, 2, 1]), + (B, H, W, C), + ) + x = self.sr(x) + x = ops.reshape(x, [input_shape[0], input_shape[2], -1]) + x = ops.transpose(x, [0, 2, 1]) + x = self.norm(x) + + k = self.k(x) + v = self.v(x) + + k = ops.transpose( + ops.reshape( + k, + [B, -1, self.num_heads, C // self.num_heads], + ), + [0, 2, 1, 3], + ) + + v = ops.transpose( + ops.reshape( + v, + [B, -1, self.num_heads, C // self.num_heads], + ), + [0, 2, 1, 3], + ) + + attn = (q @ ops.transpose(k, [0, 1, 3, 2])) * self.scale + attn = ops.nn.softmax(attn, axis=-1) + + attn = attn @ v + attn = ops.reshape( + ops.transpose(attn, [0, 2, 1, 3]), + [input_shape[0], input_shape[1], input_shape[2]], + ) + + x = self.proj(attn) + return x diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py index 4191c07575..9c83a3891a 100644 --- a/keras_cv/models/__init__.py +++ b/keras_cv/models/__init__.py @@ -112,6 +112,27 @@ from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import ( EfficientNetV2SBackbone, ) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB0Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB1Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB2Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB3Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB4Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB5Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTBackbone, +) from keras_cv.models.backbones.mobilenet_v3.mobilenet_v3_aliases import ( MobileNetV3LargeBackbone, ) @@ -166,5 +187,12 @@ YOLOV8Detector, ) from keras_cv.models.segmentation import DeepLabV3Plus +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormer +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB0 +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB1 +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB2 +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB3 +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB4 +from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB5 from keras_cv.models.stable_diffusion import StableDiffusion from keras_cv.models.stable_diffusion import StableDiffusionV2 diff --git a/keras_cv/models/backbones/mix_transformer/__init__.py b/keras_cv/models/backbones/mix_transformer/__init__.py new file mode 100644 index 0000000000..3992ffb59a --- /dev/null +++ b/keras_cv/models/backbones/mix_transformer/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py new file mode 100644 index 0000000000..7c7ea6a8b6 --- /dev/null +++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py @@ -0,0 +1,262 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import ( + MiTBackbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501 + backbone_presets, +) +from keras_cv.utils.python_utils import classproperty + +ALIAS_DOCSTRING = """MiT model. + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). + + Args: + include_rescaling: bool, whether to rescale the inputs. If set to + True, inputs will be passed through a `Rescaling(scale=1 / 255)` + layer. Defaults to True. + input_shape: optional shape tuple, defaults to (None, None, 3). + input_tensor: optional Keras tensor (i.e., output of `layers.Input()`) + to use as image input for the model. + + Examples: + ```python + input_data = tf.ones(shape=(8, 224, 224, 3)) + + # Randomly initialized backbone + model = {name}Backbone() + output = model(input_data) + ``` +""" # noqa: E501 + + +class MiTB0Backbone(MiTBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(224, 224, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return MiTBackbone.from_preset("mit_b0", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "mit_b0_imagenet": copy.deepcopy( + backbone_presets["mit_b0_imagenet"] + ), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +class MiTB1Backbone(MiTBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(224, 224, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return MiTBackbone.from_preset("mit_b1", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations.""" + return {} + + +class MiTB2Backbone(MiTBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(224, 224, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return MiTBackbone.from_preset("mit_b2", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations.""" + return {} + + +class MiTB3Backbone(MiTBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(224, 224, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return MiTBackbone.from_preset("mit_b3", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations.""" + return {} + + +class MiTB4Backbone(MiTBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(224, 224, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return MiTBackbone.from_preset("mit_b4", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations.""" + return {} + + +class MiTB5Backbone(MiTBackbone): + def __new__( + cls, + include_rescaling=True, + input_shape=(224, 224, 3), + input_tensor=None, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "include_rescaling": include_rescaling, + "input_shape": input_shape, + "input_tensor": input_tensor, + } + ) + return MiTBackbone.from_preset("mit_b5", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return {} + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations.""" + return {} + + +setattr( + MiTB0Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="MiTB0"), +) + +setattr( + MiTB1Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="MiTB1"), +) + +setattr( + MiTB2Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="MiTB2"), +) + +setattr( + MiTB3Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="MiTB3"), +) + +setattr( + MiTB4Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="MiTB4"), +) + +setattr( + MiTB5Backbone, + "__doc__", + ALIAS_DOCSTRING.format(name="MiTB5"), +) diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py new file mode 100644 index 0000000000..bf6a1a6ec2 --- /dev/null +++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py @@ -0,0 +1,188 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MiT backbone model. + +References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) + - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/models/classification/mix_transformer/mit_tf.py) + - [Based on the NVlabs' official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) + - [Inspired by @sithu31296's reimplementation](https://github.com/sithu31296/semantic-segmentation/blob/main/semseg/models/backbones/mit.py) +""" # noqa: E501 + +import copy + +import numpy as np + +from keras_cv import layers as cv_layers +from keras_cv.api_export import keras_cv_export +from keras_cv.backend import keras +from keras_cv.backend import ops +from keras_cv.models import utils +from keras_cv.models.backbones.backbone import Backbone +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501 + backbone_presets, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501 + backbone_presets_with_weights, +) +from keras_cv.utils.python_utils import classproperty + + +@keras_cv_export("keras_cv.models.MiTBackbone") +class MiTBackbone(Backbone): + def __init__( + self, + include_rescaling, + depths, + input_shape=(224, 224, 3), + input_tensor=None, + embedding_dims=None, + **kwargs, + ): + """A Keras model implementing the MixTransformer architecture to be + used as a backbone for the SegFormer architecture. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501 + - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/classification/mix_transformer) # noqa: E501 + + Args: + include_rescaling: bool, whether to rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + depths: the number of transformer encoders to be used per stage in the + network + embedding_dims: the embedding dims per hierarchical stage, used as + the levels of the feature pyramid + input_shape: optional shape tuple, defaults to (None, None, 3). + input_tensor: optional Keras tensor (i.e. output of `keras.layers.Input()`) + to use as image input for the model. + + Examples: + + Using the class with a `backbone`: + + ```python + import tensorflow as tf + import keras_cv + + images = np.ones(shape=(1, 96, 96, 3)) + labels = np.zeros(shape=(1, 96, 96, 1)) + backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") + + # Evaluate model + model(images) + + # Train model + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(from_logits=False), + metrics=["accuracy"], + ) + model.fit(images, labels, epochs=3) + ``` + """ + drop_path_rate = 0.1 + dpr = [x for x in np.linspace(0.0, drop_path_rate, sum(depths))] + blockwise_num_heads = [1, 2, 5, 8] + blockwise_sr_ratios = [8, 4, 2, 1] + num_stages = 4 + + cur = 0 + patch_embedding_layers = [] + transformer_blocks = [] + layer_norms = [] + + for i in range(num_stages): + patch_embed_layer = cv_layers.OverlappingPatchingAndEmbedding( + project_dim=embedding_dims[0] if i == 0 else embedding_dims[i], + patch_size=7 if i == 0 else 3, + stride=4 if i == 0 else 2, + name=f"patch_and_embed_{i}", + ) + patch_embedding_layers.append(patch_embed_layer) + + transformer_block = [ + cv_layers.HierarchicalTransformerEncoder( + project_dim=embedding_dims[i], + num_heads=blockwise_num_heads[i], + sr_ratio=blockwise_sr_ratios[i], + drop_prob=dpr[cur + k], + name=f"hierarchical_encoder_{i}_{k}", + ) + for k in range(depths[i]) + ] + transformer_blocks.append(transformer_block) + cur += depths[i] + layer_norms.append(keras.layers.LayerNormalization()) + + inputs = utils.parse_model_inputs(input_shape, input_tensor) + x = inputs + + if include_rescaling: + x = keras.layers.Rescaling(scale=1 / 255)(x) + + pyramid_level_inputs = [] + for i in range(num_stages): + # Compute new height/width after the `proj` + # call in `OverlappingPatchingAndEmbedding` + stride = 4 if i == 0 else 2 + new_height, new_width = ( + int(ops.shape(x)[1] / stride), + int(ops.shape(x)[2] / stride), + ) + + x = patch_embedding_layers[i](x) + for blk in transformer_blocks[i]: + x = blk(x) + x = layer_norms[i](x) + x = keras.layers.Reshape( + (new_height, new_width, -1), name=f"output_level_{i}" + )(x) + pyramid_level_inputs.append(utils.get_tensor_input_name(x)) + + super().__init__(inputs=inputs, outputs=x, **kwargs) + + self.depths = depths + self.embedding_dims = embedding_dims + self.include_rescaling = include_rescaling + self.input_tensor = input_tensor + self.pyramid_level_inputs = { + f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs) + } + + def get_config(self): + config = super().get_config() + config.update( + { + "depths": self.depths, + "embedding_dims": self.embedding_dims, + "include_rescaling": self.include_rescaling, + "input_shape": self.input_shape[1:], + "input_tensor": self.input_tensor, + } + ) + return config + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return copy.deepcopy(backbone_presets) + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return copy.deepcopy(backbone_presets_with_weights) diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py new file mode 100644 index 0000000000..a4c1c2a3e1 --- /dev/null +++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py @@ -0,0 +1,153 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MiT model preset configurations.""" + +backbone_presets_no_weights = { + "mit_b0": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks." + ), + "params": 3321962, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [32, 64, 160, 256], + "depths": [2, 2, 2, 2], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + }, + "mit_b1": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks." + ), + "params": 13156554, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [64, 128, 320, 512], + "depths": [2, 2, 2, 2], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + }, + "mit_b2": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 16 transformer blocks." + ), + "params": 24201418, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [64, 128, 320, 512], + "depths": [3, 4, 6, 3], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + }, + "mit_b3": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 28 transformer blocks." + ), + "params": 44077258, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [64, 128, 320, 512], + "depths": [3, 4, 18, 3], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + }, + "mit_b4": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 41 transformer blocks." + ), + "params": 60847818, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [64, 128, 320, 512], + "depths": [3, 8, 27, 3], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + }, + "mit_b5": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 52 transformer blocks." + ), + "params": 81448138, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [64, 128, 320, 512], + "depths": [3, 6, 40, 3], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + }, +} + +backbone_presets_with_weights = { + "mit_b0_imagenet": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks. Pre-trained on ImageNet-1K and scores 69% top-1 accuracy on the validation set." # noqa: E501 + ), + "params": 3321962, + "official_name": "MiT", + "path": "mit", + }, + "class_name": "keras_cv>MiTBackbone", + "config": { + "embedding_dims": [32, 64, 160, 256], + "depths": [2, 2, 2, 2], + "include_rescaling": True, + "input_shape": (224, 224, 3), + "input_tensor": None, + }, + "weights_url": "https://storage.googleapis.com/keras-cv/models/mitb0/imagenet/classification-v0.h5", # noqa: E501 + "weights_hash": "8e0c416cd330b6fa0bcfb3a5ccc43edcbcabf6a463aee3c2a9b6a1398c207d10", # noqa: E501 + }, +} + +backbone_presets = { + **backbone_presets_no_weights, + **backbone_presets_with_weights, +} diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py new file mode 100644 index 0000000000..0bc443ee92 --- /dev/null +++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py @@ -0,0 +1,100 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for loading pretrained model presets.""" + +import numpy as np +import pytest + +from keras_cv.backend import ops +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB0Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import ( + MiTBackbone, +) +from keras_cv.tests.test_case import TestCase + + +@pytest.mark.large +class MixTransformerPresetSmokeTest(TestCase): + """ + A smoke test for MixTransformer presets we run continuously. + This only tests the smallest weights we have available. Run with: + `pytest keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py --run_large` # noqa: E501 + """ + + def setUp(self): + self.input_batch = np.ones(shape=(2, 224, 224, 3)) + + def test_backbone_output(self): + model = MiTBackbone.from_preset("mit_b0") + model(self.input_batch) + + def test_backbone_output_with_weights(self): + model = MiTBackbone.from_preset("mit_b0_imagenet") + + # The forward pass from a preset should be stable! + # This test should catch cases where we unintentionally change our + # network code in a way that would invalidate our preset weights. + # We should only update these numbers if we are updating a weights + # file, or have found a discrepancy with the upstream source. + + outputs = model(np.ones(shape=(1, 224, 224, 3))) + expected = [-0.603472, -0.180627, -1.92137, -0.004339, 2.396384] + # Keep a high tolerance, so we are robust to different hardware. + self.assertAllClose( + ops.convert_to_numpy(outputs[0, 0, 0, :5]), + expected, + atol=0.01, + rtol=0.01, + ) + + def test_applications_model_output(self): + model = MiTB0Backbone() + model(self.input_batch) + + def test_applications_model_output_with_preset(self): + model = MiTB0Backbone.from_preset("mit_b0_imagenet") + model(self.input_batch) + + def test_preset_docstring(self): + """Check we did our docstring formatting correctly.""" + for name in MiTBackbone.presets: + self.assertRegex(MiTBackbone.from_preset.__doc__, name) + + def test_unknown_preset_error(self): + # Not a preset name + with self.assertRaises(ValueError): + MiTBackbone.from_preset("mit_b0_clowntown") + + def test_load_weights_error(self): + # Try to load weights when none available + with self.assertRaises(ValueError): + MiTBackbone.from_preset("mit_b0", load_weights=True) + + +@pytest.mark.extra_large +class MixTransformerPresetFullTest(TestCase): + """ + Test the full enumeration of our preset. + This tests every preset for Mix Transformer and is only run manually. + Run with: + `pytest keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py --run_extra_large` # noqa: E501 + """ + + def test_load_mix_transformer(self): + input_data = np.ones(shape=(2, 224, 224, 3)) + for preset in MiTBackbone.presets: + model = MiTBackbone.from_preset(preset) + model(input_data) diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py new file mode 100644 index 0000000000..f24596bdfe --- /dev/null +++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py @@ -0,0 +1,69 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import pytest +from absl.testing import parameterized + +from keras_cv.backend import keras +from keras_cv.backend import ops +from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import ( + MiTB0Backbone, +) +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import ( + MiTBackbone, +) +from keras_cv.tests.test_case import TestCase + + +class MixTransformerBackboneTest(TestCase): + def setUp(self): + self.input_batch = np.ones(shape=(2, 224, 224, 3)) + + def test_valid_call(self): + model = MiTB0Backbone() + model(self.input_batch) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_model(self): + model = MiTB0Backbone( + include_rescaling=False, + ) + model_output = model(self.input_batch) + save_path = os.path.join(self.get_temp_dir(), "mit_backbone.keras") + model.save(save_path) + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + self.assertIsInstance(restored_model, MiTBackbone) + + # Check that output matches. + restored_output = restored_model(self.input_batch) + self.assertAllClose( + ops.convert_to_numpy(model_output), + ops.convert_to_numpy(restored_output), + ) + + @parameterized.named_parameters( + ("one_channel", 1), + ("four_channels", 4), + ) + def test_application_variable_input_channels(self, num_channels): + model = MiTB0Backbone( + input_shape=(224, 224, num_channels), + include_rescaling=False, + ) + self.assertEqual(model.output_shape, (None, 7, 7, 256)) diff --git a/keras_cv/models/segmentation/__init__.py b/keras_cv/models/segmentation/__init__.py index 122dc4191e..f25ee4ea7c 100644 --- a/keras_cv/models/segmentation/__init__.py +++ b/keras_cv/models/segmentation/__init__.py @@ -13,3 +13,4 @@ # limitations under the License. from keras_cv.models.segmentation.deeplab_v3_plus import DeepLabV3Plus +from keras_cv.models.segmentation.segformer import SegFormer diff --git a/keras_cv/models/segmentation/segformer/__init__.py b/keras_cv/models/segmentation/segformer/__init__.py new file mode 100644 index 0000000000..59d29582c2 --- /dev/null +++ b/keras_cv/models/segmentation/segformer/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.models.segmentation.segformer.segformer import SegFormer diff --git a/keras_cv/models/segmentation/segformer/segformer.py b/keras_cv/models/segmentation/segformer/segformer.py new file mode 100644 index 0000000000..0985b13749 --- /dev/null +++ b/keras_cv/models/segmentation/segformer/segformer.py @@ -0,0 +1,175 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from keras_cv.api_export import keras_cv_export +from keras_cv.backend import keras +from keras_cv.models.segmentation.segformer.segformer_presets import ( # noqa: E501 + presets, +) +from keras_cv.models.segmentation.segformer.segformer_presets import ( # noqa: E501 + presets_with_weights, +) +from keras_cv.models.task import Task +from keras_cv.utils.python_utils import classproperty +from keras_cv.utils.train import get_feature_extractor + + +@keras_cv_export("keras_cv.models.segmentation.SegFormer") +class SegFormer(Task): + """A Keras model implementing the SegFormer architecture for semantic + segmentation. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501 + - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501 + + Args: + backbone: `keras.Model`. The backbone network for the model that is + used as a feature extractor for the SegFormer encoder. + It is *intended* to be used only with the MiT backbone model which + was created specifically for SegFormers. It should either be a + `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model` + that implements the `pyramid_level_inputs` property with keys + "P2", "P3", "P4", and "P5" and layer names as + values. + num_classes: int, the number of classes for the detection model, + including the background class. + projection_filters: int, number of filters in the + convolution layer projecting the concatenated features into + a segmentation map. Defaults to 256`. + + Examples: + + Using the class with a `backbone`: + + ```python + import tensorflow as tf + import keras_cv + + images = np.ones(shape=(1, 96, 96, 3)) + labels = np.zeros(shape=(1, 96, 96, 1)) + backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") + model = keras_cv.models.segmentation.SegFormer( + num_classes=1, backbone=backbone, + ) + + # Evaluate model + model(images) + + # Train model + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(from_logits=False), + metrics=["accuracy"], + ) + model.fit(images, labels, epochs=3) + ``` + """ + + def __init__( + self, + backbone, + num_classes, + projection_filters=256, + **kwargs, + ): + if not isinstance(backbone, keras.layers.Layer) or not isinstance( + backbone, keras.Model + ): + raise ValueError( + "Argument `backbone` must be a `keras.layers.Layer` instance " + f" or `keras.Model`. Received instead " + f"backbone={backbone} (of type {type(backbone)})." + ) + + inputs = backbone.input + + feature_extractor = get_feature_extractor( + backbone, list(backbone.pyramid_level_inputs.values()) + ) + # Multi-level dictionary + features = list(feature_extractor(inputs).values()) + + # Get H and W of level one output + _, H, W, _ = features[0].shape + # Project all multi-level outputs onto the same dimensionality + # and feature map shape + multi_layer_outs = [] + for feature_dim, feature in zip(backbone.embedding_dims, features): + out = keras.layers.Dense( + projection_filters, name=f"linear_{feature_dim}" + )(feature) + out = keras.layers.Resizing(H, W, interpolation="bilinear")(out) + multi_layer_outs.append(out) + + # Concat now-equal feature maps + concatenated_outs = keras.layers.Concatenate(axis=3)( + multi_layer_outs[::-1] + ) + + # Fuse concatenated features into a segmentation map + seg = keras.Sequential( + [ + keras.layers.Conv2D( + filters=projection_filters, kernel_size=1, use_bias=False + ), + keras.layers.BatchNormalization(), + keras.layers.Activation("relu"), + ] + )(concatenated_outs) + + seg = keras.layers.Dropout(0.1)(seg) + seg = keras.layers.Conv2D( + filters=num_classes, kernel_size=1, activation="softmax" + )(seg) + + output = keras.layers.Resizing( + height=inputs.shape[1], + width=inputs.shape[2], + interpolation="bilinear", + )(seg) + + super().__init__( + inputs=inputs, + outputs=output, + **kwargs, + ) + + self.num_classes = num_classes + self.projection_filters = projection_filters + self.backbone = backbone + + def get_config(self): + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "projection_filters": self.projection_filters, + "backbone": keras.saving.serialize_keras_object(self.backbone), + } + ) + return config + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return copy.deepcopy(presets) + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return copy.deepcopy(presets_with_weights) diff --git a/keras_cv/models/segmentation/segformer/segformer_aliases.py b/keras_cv/models/segmentation/segformer/segformer_aliases.py new file mode 100644 index 0000000000..03547f60f2 --- /dev/null +++ b/keras_cv/models/segmentation/segformer/segformer_aliases.py @@ -0,0 +1,244 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from keras_cv.models.segmentation.segformer.segformer import SegFormer +from keras_cv.models.segmentation.segformer.segformer_presets import presets +from keras_cv.utils.python_utils import classproperty + +ALIAS_DOCSTRING = """SegFormer model. + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). + + Args: + backbone: a KerasCV backbone for feature extraction. + num_classes: the number of classes for segmentation, including the background class. + + Examples: + ```python + input_data = tf.ones(shape=(8, 224, 224, 3)) + + # Randomly initialized backbone + backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") + segformer = keras_cv.models.SegFormer(backbone=backbone, num_classes=19) + output = model(input_data) + ``` +""" # noqa: E501 + + +class SegFormerB0(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b0", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b0": copy.deepcopy(presets["segformer_b0"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +class SegFormerB1(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b1", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b1": copy.deepcopy(presets["segformer_b1"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +class SegFormerB2(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b2", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b2": copy.deepcopy(presets["segformer_b2"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +class SegFormerB3(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b3", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b3": copy.deepcopy(presets["segformer_b3"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +class SegFormerB4(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b4", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b4": copy.deepcopy(presets["segformer_b4"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +class SegFormerB5(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b5", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b5": copy.deepcopy(presets["segformer_b5"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +setattr( + SegFormerB0, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB0"), +) + +setattr( + SegFormerB1, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB1"), +) + +setattr( + SegFormerB2, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB2"), +) + +setattr( + SegFormerB3, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB3"), +) + +setattr( + SegFormerB4, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB4"), +) + +setattr( + SegFormerB5, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB5"), +) diff --git a/keras_cv/models/segmentation/segformer/segformer_presets.py b/keras_cv/models/segmentation/segformer/segformer_presets.py new file mode 100644 index 0000000000..e19e2ec9ba --- /dev/null +++ b/keras_cv/models/segmentation/segformer/segformer_presets.py @@ -0,0 +1,105 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SegFormer model preset configurations.""" + +from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501 + backbone_presets, +) + +presets_no_weights = { + "segformer_b0": { + "metadata": { + "description": ("SegFormer model with MiTB0 backbone."), + "params": 3719027, + "official_name": "SegFormerB0", + "path": "segformer_b0", + }, + "class_name": "keras_cv>SegFormer", + "config": { + "backbone": backbone_presets["mit_b0"], + }, + }, + "segformer_b1": { + "metadata": { + "description": ("SegFormer model with MiTB1 backbone."), + "params": 13682643, + "official_name": "SegFormerB1", + "path": "segformer_b1", + }, + "class_name": "keras_cv>SegFormer", + "config": {"backbone": backbone_presets["mit_b1"]}, + }, + "segformer_b2": { + "metadata": { + "description": ("SegFormer model with MiTB2 backbone."), + "params": 24727507, + "official_name": "SegFormerB2", + "path": "segformer_b2", + }, + "class_name": "keras_cv>SegFormer", + "config": {"backbone": backbone_presets["mit_b2"]}, + }, + "segformer_b3": { + "metadata": { + "description": ("SegFormer model with MiTB3 backbone."), + "params": 44603347, + "official_name": "SegFormerB3", + "path": "segformer_b3", + }, + "class_name": "keras_cv>SegFormer", + "config": {"backbone": backbone_presets["mit_b3"]}, + }, + "segformer_b4": { + "metadata": { + "description": ("SegFormer model with MiTB4 backbone."), + "params": 61373907, + "official_name": "SegFormerB4", + "path": "segformer_b4", + }, + "class_name": "keras_cv>SegFormer", + "config": {"backbone": backbone_presets["mit_b4"]}, + }, + "segformer_b5": { + "metadata": { + "description": ("SegFormer model with MiTB5 backbone."), + "params": 81974227, + "official_name": "SegFormerB5", + "path": "segformer_b5", + }, + "class_name": "keras_cv>SegFormer", + "config": {"backbone": backbone_presets["mit_b5"]}, + }, +} + +presets_with_weights = { + "segformer_b0_imagenet": { + "metadata": { + "description": ( + "SegFormer model with a pretrained MiTB0 backbone." + ), + "params": 3719027, + "official_name": "SegFormerB0", + "path": "segformer_b0", + }, + "class_name": "keras_cv>SegFormer", + "config": { + "backbone": backbone_presets["mit_b0_imagenet"], + }, + }, +} + +presets = { + **presets_no_weights, + **presets_with_weights, +} diff --git a/keras_cv/models/segmentation/segformer/segformer_test.py b/keras_cv/models/segmentation/segformer/segformer_test.py new file mode 100644 index 0000000000..0990e0e88f --- /dev/null +++ b/keras_cv/models/segmentation/segformer/segformer_test.py @@ -0,0 +1,92 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import pytest +import tensorflow as tf + +from keras_cv.backend import keras +from keras_cv.backend import ops +from keras_cv.models import MiTBackbone +from keras_cv.models import SegFormer +from keras_cv.tests.test_case import TestCase + + +class SegFormerTest(TestCase): + def test_segformer_construction(self): + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=1) + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(), + metrics=["accuracy"], + ) + + @pytest.mark.large + def test_segformer_call(self): + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=1) + images = np.random.uniform(size=(2, 512, 512, 3)) + _ = model(images) + _ = model.predict(images) + + @pytest.mark.large + def test_weights_change(self): + target_size = [512, 512, 2] + + images = tf.ones(shape=[1] + [512, 512, 3]) + labels = tf.zeros(shape=[1] + target_size) + ds = tf.data.Dataset.from_tensor_slices((images, labels)) + ds = ds.repeat(2) + ds = ds.batch(2) + + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=2) + + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(), + metrics=["accuracy"], + ) + + original_weights = model.get_weights() + model.fit(ds, epochs=1) + updated_weights = model.get_weights() + + for w1, w2 in zip(original_weights, updated_weights): + self.assertNotAllEqual(w1, w2) + self.assertFalse(ops.any(ops.isnan(w2))) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_model(self): + target_size = [512, 512, 3] + + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=1) + + input_batch = np.ones(shape=[2] + target_size) + model_output = model(input_batch) + + save_path = os.path.join(self.get_temp_dir(), "model.keras") + model.save(save_path, save_format="keras_v3") + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + self.assertIsInstance(restored_model, SegFormer) + + # Check that output matches. + restored_output = restored_model(input_batch) + self.assertAllClose(model_output, restored_output) From acd2681cae2669065207de2623527729a0db0b58 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Fri, 25 Aug 2023 18:33:08 -0600 Subject: [PATCH 17/17] Fix test errors that were introduced by upgrading to Keras Core 0.1.5 (#2041) * Fix build process for spatial pyramid pooling * Fix label encoder for YOLOV8 for 0.1.5 --- keras_cv/layers/spatial_pyramid.py | 10 +++++++++- .../object_detection/yolo_v8/yolo_v8_label_encoder.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/keras_cv/layers/spatial_pyramid.py b/keras_cv/layers/spatial_pyramid.py index 9c9a6849df..b45ee7bda3 100644 --- a/keras_cv/layers/spatial_pyramid.py +++ b/keras_cv/layers/spatial_pyramid.py @@ -91,6 +91,7 @@ def build(self, input_shape): keras.layers.Activation(self.activation), ] ) + conv_sequential.build(input_shape) self.aspp_parallel_channels.append(conv_sequential) # Channel 2 and afterwards are based on self.dilation_rates, and each of @@ -109,6 +110,7 @@ def build(self, input_shape): keras.layers.Activation(self.activation), ] ) + conv_sequential.build(input_shape) self.aspp_parallel_channels.append(conv_sequential) # Last channel is the global average pooling with conv2D 1x1 kernel. @@ -125,10 +127,11 @@ def build(self, input_shape): keras.layers.Activation(self.activation), ] ) + pool_sequential.build(input_shape) self.aspp_parallel_channels.append(pool_sequential) # Final projection layers - self.projection = keras.Sequential( + projection = keras.Sequential( [ keras.layers.Conv2D( filters=self.num_channels, @@ -140,6 +143,11 @@ def build(self, input_shape): keras.layers.Dropout(rate=self.dropout), ], ) + projection_input_channels = ( + 2 + len(self.dilation_rates) + ) * self.num_channels + projection.build(tuple(input_shape[:-1]) + (projection_input_channels,)) + self.projection = projection def call(self, inputs, training=None): """Calls the Atrous Spatial Pyramid Pooling layer on an input. diff --git a/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py b/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py index 48e09740f2..9595cd2ee3 100644 --- a/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py +++ b/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py @@ -225,7 +225,7 @@ def encode_to_targets( # return zeros if no gt boxes are present return ops.cond( - max_num_boxes > 0, + ops.array(max_num_boxes > 0), lambda: encode_to_targets( pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt ),