From 2a704e1a69cc46c808209f45ce19f311b05fe19d Mon Sep 17 00:00:00 2001
From: IMvision12 <88665786+IMvision12@users.noreply.github.com>
Date: Tue, 15 Aug 2023 04:55:44 +0530
Subject: [PATCH 01/17] [RandomTranslation] Supporting Segmentation Masks
(#2024)
* Added support for segmentation
* Format
* Update ReadMe
* Demo
* interpolation
* Format
* Format
---
.../segmentation/random_translation_demo.py | 33 +++++++++++++++++++
keras_cv/layers/preprocessing/README.md | 2 +-
.../preprocessing/random_translation.py | 28 ++++++++++++++++
3 files changed, 62 insertions(+), 1 deletion(-)
create mode 100644 examples/layers/preprocessing/segmentation/random_translation_demo.py
diff --git a/examples/layers/preprocessing/segmentation/random_translation_demo.py b/examples/layers/preprocessing/segmentation/random_translation_demo.py
new file mode 100644
index 0000000000..72abb9bcbb
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_translation_demo.py
@@ -0,0 +1,33 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_translation_demo.py shows how to use the RandomTranslation
+preprocessing layer. Uses the oxford iiit pet_dataset. In this
+script the pets are loaded, then are passed through the
+preprocessing layers. Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+ ds = demo_utils.load_oxford_iiit_pet_dataset()
+ randomcutout = preprocessing.RandomTranslation(0.5, 0.5)
+ ds = ds.map(randomcutout, num_parallel_calls=tf.data.AUTOTUNE)
+ demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index fcfacd8cd3..b3fef9e96c 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -38,7 +38,7 @@ The provided table gives an overview of the different augmentation layers availa
| RandomSaturation | ✅ | ✅ | ✅ | ✅ |
| RandomSharpness | ✅ | ✅ | ✅ | ✅ |
| RandomShear | ✅ | ❌ | ✅ | ✅ |
-| RandomTranslation | ✅ | ❌ | ✅ | ✅ |
+| RandomTranslation | ✅ | ✅ | ✅ | ✅ |
| RandomZoom | ✅ | ❌ | ❌ | ✅ |
| RepeatedAugmentation + | - | - | - | - |
| Rescaling | ❌ | ✅ | ✅ | ✅ |
diff --git a/keras_cv/layers/preprocessing/random_translation.py b/keras_cv/layers/preprocessing/random_translation.py
index 8b3a99a684..3fcb6d7daf 100644
--- a/keras_cv/layers/preprocessing/random_translation.py
+++ b/keras_cv/layers/preprocessing/random_translation.py
@@ -201,6 +201,34 @@ def augment_images(self, images, transformations, **kwargs):
def augment_labels(self, labels, transformations, **kwargs):
return labels
+ def augment_segmentation_masks(
+ self, segmentation_masks, transformations, **kwargs
+ ):
+ segmentation_masks = preprocessing_utils.ensure_tensor(
+ segmentation_masks, self.compute_dtype
+ )
+ original_shape = segmentation_masks.shape
+ mask_shape = tf.shape(segmentation_masks)
+ img_hd = tf.cast(mask_shape[H_AXIS], tf.float32)
+ img_wd = tf.cast(mask_shape[W_AXIS], tf.float32)
+ height_translations = transformations["height_translations"]
+ width_translations = transformations["width_translations"]
+ height_translations = height_translations * img_hd
+ width_translations = width_translations * img_wd
+ translations = tf.cast(
+ tf.concat([width_translations, height_translations], axis=1),
+ dtype=tf.float32,
+ )
+ output = preprocessing_utils.transform(
+ segmentation_masks,
+ preprocessing_utils.get_translation_matrix(translations),
+ interpolation="nearest",
+ fill_mode=self.fill_mode,
+ fill_value=self.fill_value,
+ )
+ output.set_shape(original_shape)
+ return output
+
def augment_bounding_boxes(
self, bounding_boxes, transformations, images=None, **kwargs
):
From d611c83e73e86a829893eb1d64eb207ecfb4a37b Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Tue, 15 Aug 2023 11:48:23 -0600
Subject: [PATCH 02/17] Restructure retinanet steps to avoid modifying args
(#2029)
---
.../object_detection/retinanet/retinanet.py | 59 +++++--------------
1 file changed, 14 insertions(+), 45 deletions(-)
diff --git a/keras_cv/models/object_detection/retinanet/retinanet.py b/keras_cv/models/object_detection/retinanet/retinanet.py
index 4c9fc5909d..3dd42c185c 100644
--- a/keras_cv/models/object_detection/retinanet/retinanet.py
+++ b/keras_cv/models/object_detection/retinanet/retinanet.py
@@ -396,10 +396,18 @@ def compile(
super().compile(loss=losses, **kwargs)
def compute_loss(self, x, y, y_pred, sample_weight, **kwargs):
+ y_for_label_encoder = bounding_box.convert_format(
+ y,
+ source=self.bounding_box_format,
+ target=self.label_encoder.bounding_box_format,
+ images=x,
+ )
+
+ boxes, classes = self.label_encoder(x, y_for_label_encoder)
+
box_pred = y_pred["box"]
cls_pred = y_pred["classification"]
- boxes = y["box"]
- classes = y["classification"]
+
if boxes.shape[-1] != 4:
raise ValueError(
"boxes should have shape (None, None, 4). Got "
@@ -453,50 +461,15 @@ def compute_loss(self, x, y, y_pred, sample_weight, **kwargs):
def train_step(self, *args):
data = args[-1]
+ args = args[:-1]
x, y = unpack_input(data)
-
- y_for_label_encoder = bounding_box.convert_format(
- y,
- source=self.bounding_box_format,
- target=self.label_encoder.bounding_box_format,
- images=x,
- )
-
- boxes, classes = self.label_encoder(x, y_for_label_encoder)
- super_args = args[:-1] + (
- (
- x,
- {"box": boxes, "classification": classes, "unencoded": y},
- ),
- )
-
- return super().train_step(*super_args)
+ return super().train_step(*args, (x, y))
def test_step(self, *args):
data = args[-1]
+ args = args[:-1]
x, y = unpack_input(data)
- y_for_label_encoder = bounding_box.convert_format(
- y,
- source=self.bounding_box_format,
- target=self.label_encoder.bounding_box_format,
- images=x,
- )
- boxes, classes = self.label_encoder(x, y_for_label_encoder)
- boxes = bounding_box.convert_format(
- boxes,
- source=self.label_encoder.bounding_box_format,
- target=self.bounding_box_format,
- images=x,
- )
-
- super_args = args[:-1] + (
- (
- x,
- {"box": boxes, "classification": classes, "unencoded": y},
- ),
- )
-
- return super().test_step(*super_args)
+ return super().test_step(*args, (x, y))
def compute_metrics(self, x, y, y_pred, sample_weight):
metrics = {}
@@ -505,10 +478,6 @@ def compute_metrics(self, x, y, y_pred, sample_weight):
if not self._has_user_metrics:
return metrics
- # For computing non-loss metrics, we don't care about the encoded
- # boxes and classes, just the raw input boxes.
- y = y["unencoded"]
-
y_pred = self.decode_predictions(y_pred, x)
for metric in self._user_metrics:
From db57e1d0dab58e97c7cedfe1db2fe6a3dd7750db Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:17:19 -0600
Subject: [PATCH 03/17] Use non-ragged outputs in MultiClassNMS (#2030)
* Use non-ragged outputs in MultiClassNMS
Using Ragged outputs that weren't subsequently padded was causing issues in the PyCOCOCallback, and we shouldn't silently default to Ragged anywhere.
* Update multi_class_non_max_suppression_test.py
---
.../object_detection/multi_class_non_max_suppression.py | 2 +-
.../multi_class_non_max_suppression_test.py | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression.py
index 3d34eafbf2..7825268578 100644
--- a/keras_cv/layers/object_detection/multi_class_non_max_suppression.py
+++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression.py
@@ -123,7 +123,7 @@ def call(
}
# this is required to comply with KerasCV bounding box format.
return bounding_box.mask_invalid_detections(
- bounding_boxes, output_ragged=True
+ bounding_boxes, output_ragged=False
)
def get_config(self):
diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py
index f019e182d7..19eef623ff 100644
--- a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py
+++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py
@@ -47,6 +47,6 @@ def decode_predictions_output_shapes():
class NmsPredictionDecoderTest(TestCase):
def test_decode_predictions_output_shapes(self):
result = decode_predictions_output_shapes()
- self.assertEqual(result["boxes"].shape, [8, None, 4])
- self.assertEqual(result["classes"].shape, [8, None])
- self.assertEqual(result["confidence"].shape, [8, None])
+ self.assertEqual(result["boxes"].shape, [8, 100, 4])
+ self.assertEqual(result["classes"].shape, [8, 100])
+ self.assertEqual(result["confidence"].shape, [8, 100])
From 39278475b0751ce94d0f4c1c2673040b66ba17a1 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:42:11 -0600
Subject: [PATCH 04/17] Use correct convention for static shape in point_cloud
grouping (#2028)
* Use correct convention for static shape in point_cloud grouping
* Use len(shape) for rank
---
keras_cv/point_cloud/point_cloud.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/keras_cv/point_cloud/point_cloud.py b/keras_cv/point_cloud/point_cloud.py
index 5e27e8b62c..ad345f915d 100644
--- a/keras_cv/point_cloud/point_cloud.py
+++ b/keras_cv/point_cloud/point_cloud.py
@@ -72,10 +72,10 @@ def group_points_by_boxes(points, boxes):
box, all the point indices that belong to the box.
"""
- num_boxes = boxes.get_shape().as_list()[-2] or tf.shape(boxes)[-2]
+ num_boxes = boxes.shape[-2] or tf.shape(boxes)[-2]
# [..., num_points]
box_indices = within_box3d_index(points, boxes)
- num_points = points.get_shape().as_list()[-2] or tf.shape(points)[-2]
+ num_points = points.shape[-2] or tf.shape(points)[-2]
point_indices = tf.range(num_points, dtype=tf.int32)
def group_per_sample(box_index):
@@ -87,7 +87,7 @@ def group_per_sample(box_index):
)
return res
- boxes_rank = boxes.shape.rank
+ boxes_rank = len(boxes.shape)
if boxes_rank == 2:
return group_per_sample(box_indices)
elif boxes_rank == 3:
From 1eabccb30908a1a10143c0c46204443d7c7825d5 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Thu, 17 Aug 2023 07:46:23 +0530
Subject: [PATCH 05/17] Migrate Efficientnetlite to Backbone (#1877)
* initial commit with moved files+added files
* removed unwanted
* added presets
* backbone file changes
* aliases file added
* backbone fix
* inits
* format and lint
* format and lint
* add test+format and lint
* update params
* reviewed comments
* fix
* fix
* updated test backbone
* review changes from #1716
* fix
* port
* port:typo+fix
* port: fix test error
* port: update
* fix backbone
* fix preset in aliases
* nits
---
keras_cv/models/__init__.py | 18 +
keras_cv/models/backbones/backbone_presets.py | 5 +
.../backbones/efficientnet_lite/__init__.py | 13 +
.../efficientnet_lite_aliases.py | 228 ++++++
.../efficientnet_lite_backbone.py | 366 ++++++++++
.../efficientnet_lite_backbone_presets.py | 175 +++++
...efficientnet_lite_backbone_presets_test.py | 60 ++
.../efficientnet_lite_backbone_test.py | 162 +++++
keras_cv/models/legacy/__init__.py | 5 -
keras_cv/models/legacy/efficientnet_lite.py | 678 ------------------
.../models/legacy/efficientnet_lite_test.py | 55 --
11 files changed, 1027 insertions(+), 738 deletions(-)
create mode 100644 keras_cv/models/backbones/efficientnet_lite/__init__.py
create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py
create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py
create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py
create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py
delete mode 100644 keras_cv/models/legacy/efficientnet_lite.py
delete mode 100644 keras_cv/models/legacy/efficientnet_lite_test.py
diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py
index 3e5847a346..1861b49c03 100644
--- a/keras_cv/models/__init__.py
+++ b/keras_cv/models/__init__.py
@@ -43,6 +43,24 @@
from keras_cv.models.backbones.densenet.densenet_backbone import (
DenseNetBackbone,
)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB1Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB2Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB3Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB4Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501
+ EfficientNetLiteBackbone,
+)
from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import (
EfficientNetV2B0Backbone,
)
diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py
index 1535d91374..3852b28877 100644
--- a/keras_cv/models/backbones/backbone_presets.py
+++ b/keras_cv/models/backbones/backbone_presets.py
@@ -16,6 +16,9 @@
from keras_cv.models.backbones.csp_darknet import csp_darknet_backbone_presets
from keras_cv.models.backbones.densenet import densenet_backbone_presets
+from keras_cv.models.backbones.efficientnet_lite import (
+ efficientnet_lite_backbone_presets,
+)
from keras_cv.models.backbones.efficientnet_v2 import (
efficientnet_v2_backbone_presets,
)
@@ -31,6 +34,7 @@
**csp_darknet_backbone_presets.backbone_presets_no_weights,
**efficientnet_v2_backbone_presets.backbone_presets_no_weights,
**densenet_backbone_presets.backbone_presets_no_weights,
+ **efficientnet_lite_backbone_presets.backbone_presets_no_weights,
**yolo_v8_backbone_presets.backbone_presets_no_weights,
}
@@ -41,6 +45,7 @@
**csp_darknet_backbone_presets.backbone_presets_with_weights,
**efficientnet_v2_backbone_presets.backbone_presets_with_weights,
**densenet_backbone_presets.backbone_presets_with_weights,
+ **efficientnet_lite_backbone_presets.backbone_presets_with_weights,
**yolo_v8_backbone_presets.backbone_presets_with_weights,
}
diff --git a/keras_cv/models/backbones/efficientnet_lite/__init__.py b/keras_cv/models/backbones/efficientnet_lite/__init__.py
new file mode 100644
index 0000000000..3992ffb59a
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py
new file mode 100644
index 0000000000..1a8fe92404
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py
@@ -0,0 +1,228 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501
+ EfficientNetLiteBackbone,
+)
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """Instantiates the {name} architecture.
+
+ Reference:
+ - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+ (ICML 2019)
+
+ Args:
+ include_rescaling: bool, whether to rescale the inputs. If set
+ to `True`, inputs will be passed through a `Rescaling(1/255.0)`
+ layer.
+ input_shape: optional shape tuple, defaults to (None, None, 3).
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+ to use as image input for the model.
+ Usage:
+ ```python
+ input_data = np.ones(shape=(8, 224, 224, 3))
+
+ # Randomly initialized backbone
+ model = {name}Backbone()
+ output = model(input_data)
+ ```
+""" # noqa: E501
+
+
+class EfficientNetLiteB0Backbone(EfficientNetLiteBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetLiteBackbone.from_preset(
+ "efficientnetlite_b0", **kwargs
+ )
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetLiteB1Backbone(EfficientNetLiteBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetLiteBackbone.from_preset(
+ "efficientnetlite_b1", **kwargs
+ )
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetLiteB2Backbone(EfficientNetLiteBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetLiteBackbone.from_preset(
+ "efficientnetlite_b2", **kwargs
+ )
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetLiteB3Backbone(EfficientNetLiteBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetLiteBackbone.from_preset(
+ "efficientnetlite_b3", **kwargs
+ )
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetLiteB4Backbone(EfficientNetLiteBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetLiteBackbone.from_preset(
+ "efficientnetlite_b4", **kwargs
+ )
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+setattr(
+ EfficientNetLiteB0Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetLiteB0"),
+)
+setattr(
+ EfficientNetLiteB1Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetLiteB1"),
+)
+setattr(
+ EfficientNetLiteB2Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetLiteB2"),
+)
+setattr(
+ EfficientNetLiteB3Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetLiteB3"),
+)
+setattr(
+ EfficientNetLiteB4Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetLiteB4"),
+)
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
new file mode 100644
index 0000000000..d3a6fd8815
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
@@ -0,0 +1,366 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""EfficientNet Lite backbone model.
+
+Reference:
+ - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+ (ICML 2019)
+ - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
+""" # noqa: E501
+
+import copy
+import math
+
+from keras_cv.backend import keras
+from keras_cv.models import utils
+from keras_cv.models.backbones.backbone import Backbone
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone_presets import ( # noqa: E501
+ backbone_presets,
+)
+from keras_cv.utils.python_utils import classproperty
+
+BN_AXIS = 3
+
+
+@keras.saving.register_keras_serializable(package="keras_cv.models")
+class EfficientNetLiteBackbone(Backbone):
+ """Instantiates the EfficientNetLite architecture using given scaling
+ coefficients.
+
+ Reference:
+ - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+ (ICML 2019)
+ - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
+
+ Args:
+ include_rescaling: whether to rescale the inputs. If set to True,
+ inputs will be passed through a `Rescaling(1/255.0)` layer.
+ width_coefficient: float, scaling coefficient for network width.
+ depth_coefficient: float, scaling coefficient for network depth.
+ dropout_rate: float, dropout rate before final classifier layer.
+ drop_connect_rate: float, dropout rate at skip connections. The
+ default value is set to 0.2.
+ depth_divisor: integer, a unit of network width. The default value
+ is set to 8.
+ activation: activation function.
+ input_shape: optional shape tuple,
+ It should have exactly 3 inputs channels.
+ input_tensor: optional Keras tensor (i.e. output of `keras.layers.Input()`)
+ to use as image input for the model.
+
+ Usage:
+ ```python
+ # Construct an EfficientNetLite from a preset:
+ efficientnet = models.EfficientNetLiteBackbone.from_preset(
+ "efficientnetlite_b0"
+ )
+ images = np.ones((1, 256, 256, 3))
+ outputs = efficientnet.predict(images)
+
+ # Alternatively, you can also customize the EfficientNetLite architecture:
+ model = EfficientNetLiteBackbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=False,
+ )
+ images = np.ones((1, 256, 256, 3))
+ outputs = model.predict(images)
+ ```
+ """ # noqa: E501
+
+ def __init__(
+ self,
+ *,
+ include_rescaling,
+ width_coefficient,
+ depth_coefficient,
+ stackwise_kernel_sizes,
+ stackwise_num_repeats,
+ stackwise_input_filters,
+ stackwise_output_filters,
+ stackwise_expansion_ratios,
+ stackwise_strides,
+ dropout_rate=0.2,
+ drop_connect_rate=0.2,
+ depth_divisor=8,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ activation="relu6",
+ **kwargs,
+ ):
+ img_input = utils.parse_model_inputs(input_shape, input_tensor)
+
+ # Build stem
+ x = img_input
+
+ if include_rescaling:
+ # Use common rescaling strategy across keras_cv
+ x = keras.layers.Rescaling(1.0 / 255.0)(x)
+
+ x = keras.layers.ZeroPadding2D(
+ padding=utils.correct_pad_downsample(x, 3), name="stem_conv_pad"
+ )(x)
+ x = keras.layers.Conv2D(
+ 32,
+ 3,
+ strides=2,
+ padding="valid",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name="stem_conv",
+ )(x)
+ x = keras.layers.BatchNormalization(axis=BN_AXIS, name="stem_bn")(x)
+ x = keras.layers.Activation(activation, name="stem_activation")(x)
+
+ # Build blocks
+ block_id = 0
+ blocks = float(sum(stackwise_num_repeats))
+
+ pyramid_level_inputs = []
+
+ for i in range(len(stackwise_kernel_sizes)):
+ num_repeats = stackwise_num_repeats[i]
+ input_filters = stackwise_input_filters[i]
+ output_filters = stackwise_output_filters[i]
+ # Update block input and output filters based on depth multiplier.
+ input_filters = round_filters(
+ filters=input_filters,
+ width_coefficient=width_coefficient,
+ depth_divisor=depth_divisor,
+ )
+ output_filters = round_filters(
+ filters=output_filters,
+ width_coefficient=width_coefficient,
+ depth_divisor=depth_divisor,
+ )
+
+ if i == 0 or i == (len(stackwise_kernel_sizes) - 1):
+ repeats = num_repeats
+ else:
+ repeats = round_repeats(
+ repeats=num_repeats,
+ depth_coefficient=depth_coefficient,
+ )
+ strides = stackwise_strides[i]
+
+ for j in range(repeats):
+ # The first block needs to take care of stride and filter size
+ # increase.
+ if j > 0:
+ strides = 1
+ input_filters = output_filters
+
+ if strides != 1:
+ pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+ # 97 is the start of the lowercase alphabet.
+ letter_identifier = chr(j + 97)
+ x = apply_efficient_net_lite_block(
+ inputs=x,
+ filters_in=input_filters,
+ filters_out=output_filters,
+ kernel_size=stackwise_kernel_sizes[i],
+ strides=strides,
+ expand_ratio=stackwise_expansion_ratios[i],
+ activation=activation,
+ dropout_rate=drop_connect_rate * block_id / blocks,
+ name="block{}{}_".format(i + 1, letter_identifier),
+ )
+ block_id += 1
+
+ # Build top
+ x = keras.layers.Conv2D(
+ 1280,
+ 1,
+ padding="same",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name="top_conv",
+ )(x)
+ x = keras.layers.BatchNormalization(axis=BN_AXIS, name="top_bn")(x)
+ x = keras.layers.Activation(activation, name="top_activation")(x)
+
+ pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+ # Create model.
+ super().__init__(inputs=img_input, outputs=x, **kwargs)
+
+ self.include_rescaling = include_rescaling
+ self.width_coefficient = width_coefficient
+ self.depth_coefficient = depth_coefficient
+ self.dropout_rate = dropout_rate
+ self.drop_connect_rate = drop_connect_rate
+ self.depth_divisor = depth_divisor
+ self.activation = activation
+ self.input_tensor = input_tensor
+ self.pyramid_level_inputs = {
+ f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs)
+ }
+ self.stackwise_kernel_sizes = stackwise_kernel_sizes
+ self.stackwise_num_repeats = stackwise_num_repeats
+ self.stackwise_input_filters = stackwise_input_filters
+ self.stackwise_output_filters = stackwise_output_filters
+ self.stackwise_expansion_ratios = stackwise_expansion_ratios
+ self.stackwise_strides = stackwise_strides
+
+ def get_config(self):
+ config = super().get_config()
+ config.update(
+ {
+ "include_rescaling": self.include_rescaling,
+ "width_coefficient": self.width_coefficient,
+ "depth_coefficient": self.depth_coefficient,
+ "dropout_rate": self.dropout_rate,
+ "drop_connect_rate": self.drop_connect_rate,
+ "depth_divisor": self.depth_divisor,
+ "activation": self.activation,
+ "input_tensor": self.input_tensor,
+ "input_shape": self.input_shape[1:],
+ "stackwise_kernel_sizes": self.stackwise_kernel_sizes,
+ "stackwise_num_repeats": self.stackwise_num_repeats,
+ "stackwise_input_filters": self.stackwise_input_filters,
+ "stackwise_output_filters": self.stackwise_output_filters,
+ "stackwise_expansion_ratios": self.stackwise_expansion_ratios,
+ "stackwise_strides": self.stackwise_strides,
+ }
+ )
+ return config
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return copy.deepcopy(backbone_presets)
+
+
+def conv_kernel_initializer(scale=2.0):
+ return keras.initializers.VarianceScaling(
+ scale=scale, mode="fan_out", distribution="truncated_normal"
+ )
+
+
+def round_filters(filters, depth_divisor, width_coefficient):
+ """Round number of filters based on depth multiplier."""
+ filters *= width_coefficient
+ new_filters = max(
+ depth_divisor,
+ int(filters + depth_divisor / 2) // depth_divisor * depth_divisor,
+ )
+ # Make sure that round down does not go down by more than 10%.
+ if new_filters < 0.9 * filters:
+ new_filters += depth_divisor
+ return int(new_filters)
+
+
+def round_repeats(repeats, depth_coefficient):
+ """Round number of repeats based on depth multiplier."""
+ return int(math.ceil(depth_coefficient * repeats))
+
+
+def apply_efficient_net_lite_block(
+ inputs,
+ activation="relu6",
+ dropout_rate=0.0,
+ name=None,
+ filters_in=32,
+ filters_out=16,
+ kernel_size=3,
+ strides=1,
+ expand_ratio=1,
+):
+ """An inverted residual block, without SE phase.
+
+ Args:
+ inputs: input tensor.
+ activation: activation function.
+ dropout_rate: float between 0 and 1, fraction of the input units to drop.
+ name: string, block label.
+ filters_in: integer, the number of input filters.
+ filters_out: integer, the number of output filters.
+ kernel_size: integer, the dimension of the convolution window.
+ strides: integer, the stride of the convolution.
+ expand_ratio: integer, scaling coefficient for the input filters.
+
+ Returns:
+ output tensor for the block.
+ """ # noqa: E501
+ if name is None:
+ name = f"block_{keras.backend.get_uid('block_')}_"
+
+ # Expansion phase
+ filters = filters_in * expand_ratio
+ if expand_ratio != 1:
+ x = keras.layers.Conv2D(
+ filters,
+ 1,
+ padding="same",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name=name + "expand_conv",
+ )(inputs)
+ x = keras.layers.BatchNormalization(
+ axis=BN_AXIS, name=name + "expand_bn"
+ )(x)
+ x = keras.layers.Activation(
+ activation, name=name + "expand_activation"
+ )(x)
+ else:
+ x = inputs
+
+ # Depthwise Convolution
+ if strides == 2:
+ x = keras.layers.ZeroPadding2D(
+ padding=utils.correct_pad_downsample(x, kernel_size),
+ name=name + "dwconv_pad",
+ )(x)
+ conv_pad = "valid"
+ else:
+ conv_pad = "same"
+ x = keras.layers.DepthwiseConv2D(
+ kernel_size,
+ strides=strides,
+ padding=conv_pad,
+ use_bias=False,
+ depthwise_initializer=conv_kernel_initializer(),
+ name=name + "dwconv",
+ )(x)
+ x = keras.layers.BatchNormalization(axis=BN_AXIS, name=name + "bn")(x)
+ x = keras.layers.Activation(activation, name=name + "activation")(x)
+
+ # Output phase
+ x = keras.layers.Conv2D(
+ filters_out,
+ 1,
+ padding="same",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name=name + "project_conv",
+ )(x)
+ x = keras.layers.BatchNormalization(axis=BN_AXIS, name=name + "project_bn")(
+ x
+ )
+ if strides == 1 and filters_in == filters_out:
+ if dropout_rate > 0:
+ x = keras.layers.Dropout(
+ dropout_rate, noise_shape=(None, 1, 1, 1), name=name + "drop"
+ )(x)
+ x = keras.layers.add([x, inputs], name=name + "add")
+ return x
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py
new file mode 100644
index 0000000000..db9838e3de
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py
@@ -0,0 +1,175 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""EfficientNetLite model preset configurations."""
+
+backbone_presets_no_weights = {
+ "efficientnetlite_b0": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.0` and `depth_coefficient=1.0`."
+ ),
+ "params": 3414176,
+ "official_name": "EfficientNetLite",
+ "path": "EfficientNetLite",
+ },
+ "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+ "config": {
+ "width_coefficient": 1.0,
+ "depth_coefficient": 1.0,
+ "dropout_rate": 0.2,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "relu6",
+ },
+ },
+ "efficientnetlite_b1": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.0` and `depth_coefficient=1.1`."
+ ),
+ "params": 4190496,
+ "official_name": "EfficientNetLite",
+ "path": "EfficientNetLite",
+ },
+ "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+ "config": {
+ "width_coefficient": 1.0,
+ "depth_coefficient": 1.1,
+ "dropout_rate": 0.2,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "relu6",
+ },
+ },
+ "efficientnetlite_b2": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.1` and `depth_coefficient=1.2`."
+ ),
+ "params": 4870320,
+ "official_name": "EfficientNetLite",
+ "path": "EfficientNetLite",
+ },
+ "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+ "config": {
+ "width_coefficient": 1.1,
+ "depth_coefficient": 1.2,
+ "dropout_rate": 0.3,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "relu6",
+ },
+ },
+ "efficientnetlite_b3": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.2` and `depth_coefficient=1.4`."
+ ),
+ "params": 6994504,
+ "official_name": "EfficientNetLite",
+ "path": "EfficientNetLite",
+ },
+ "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+ "config": {
+ "width_coefficient": 1.2,
+ "depth_coefficient": 1.4,
+ "dropout_rate": 0.3,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "relu6",
+ },
+ },
+ "efficientnetlite_b4": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.4` and `depth_coefficient=1.8`."
+ ),
+ "params": 11840256,
+ "official_name": "EfficientNetLite",
+ "path": "EfficientNetLite",
+ },
+ "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+ "config": {
+ "width_coefficient": 1.4,
+ "depth_coefficient": 1.8,
+ "dropout_rate": 0.3,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "relu6",
+ },
+ },
+}
+
+backbone_presets_with_weights = {}
+
+backbone_presets = {
+ **backbone_presets_no_weights,
+ **backbone_presets_with_weights,
+}
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py
new file mode 100644
index 0000000000..d4e783141e
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py
@@ -0,0 +1,60 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501
+ EfficientNetLiteBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+@pytest.mark.extra_large
+class EfficientNetLitePresetFullTest(TestCase):
+ """
+ Test the full enumeration of our preset.
+ This tests every preset for EfficientNetLite and is only run manually.
+ Run with:
+ `pytest keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py --run_extra_large`
+ """ # noqa: E501
+
+ @parameterized.named_parameters(
+ *[(preset, preset) for preset in EfficientNetLiteBackbone.presets]
+ )
+ def test_load_efficientnetlite(self, preset):
+ input_data = np.ones(shape=(2, 224, 224, 3))
+ model = EfficientNetLiteBackbone.from_preset(preset)
+ model(input_data)
+
+ def test_efficientnetlite_feature_extractor(self):
+ model = EfficientNetLiteB0Backbone(
+ include_rescaling=False,
+ input_shape=[256, 256, 3],
+ )
+ levels = ["P3", "P4"]
+ layer_names = [model.pyramid_level_inputs[level] for level in levels]
+ backbone_model = get_feature_extractor(model, layer_names, levels)
+ inputs = keras.Input(shape=[256, 256, 3])
+ outputs = backbone_model(inputs)
+ self.assertLen(outputs, 2)
+ self.assertEquals(list(outputs.keys()), levels)
+ self.assertEquals(outputs["P3"].shape[:3], (None, 32, 32))
+ self.assertEquals(outputs["P4"].shape[:3], (None, 16, 16))
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py
new file mode 100644
index 0000000000..195e6ea0cf
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py
@@ -0,0 +1,162 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import ( # noqa: E501
+ EfficientNetLiteB0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501
+ EfficientNetLiteBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+class EfficientNetLiteBackboneTest(TestCase):
+ def setUp(self):
+ self.input_batch = np.ones(shape=(8, 224, 224, 3))
+
+ def test_valid_call(self):
+ model = EfficientNetLiteBackbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=False,
+ )
+ model(self.input_batch)
+
+ def test_valid_call_alias_model_with_rescaling(self):
+ model = EfficientNetLiteB0Backbone(include_rescaling=True)
+ model(self.input_batch)
+
+ def test_valid_call_with_rescaling(self):
+ model = EfficientNetLiteBackbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=True,
+ )
+ model(self.input_batch)
+
+ @pytest.mark.large # Saving is slow, so mark these large.
+ def test_saved_model(self):
+ model = EfficientNetLiteBackbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=True,
+ )
+ model_output = model(self.input_batch)
+ save_path = os.path.join(
+ self.get_temp_dir(), "efficientnet_lite_backbone.keras"
+ )
+ model.save(save_path)
+ restored_model = keras.models.load_model(save_path)
+
+ # Check we got the real object back.
+ self.assertIsInstance(restored_model, EfficientNetLiteBackbone)
+
+ # Check that output matches.
+ restored_output = restored_model(self.input_batch)
+ self.assertAllClose(model_output, restored_output)
+
+ @pytest.mark.large # Saving is slow, so mark these large.
+ def test_saved_alias_model(self):
+ model = EfficientNetLiteB0Backbone()
+ model_output = model(self.input_batch)
+ save_path = os.path.join(
+ self.get_temp_dir(), "efficientnet_lite_backbone.keras"
+ )
+ model.save(save_path)
+ restored_model = keras.models.load_model(save_path)
+
+ # Check we got the real object back.
+ # Note that these aliases serialized as the base class
+ self.assertIsInstance(restored_model, EfficientNetLiteBackbone)
+
+ # Check that output matches.
+ restored_output = restored_model(self.input_batch)
+ self.assertAllClose(model_output, restored_output)
+
+ def test_feature_pyramid_inputs(self):
+ model = EfficientNetLiteB0Backbone()
+ backbone_model = get_feature_extractor(
+ model,
+ model.pyramid_level_inputs.values(),
+ model.pyramid_level_inputs.keys(),
+ )
+ input_size = 256
+ inputs = keras.Input(shape=[input_size, input_size, 3])
+ outputs = backbone_model(inputs)
+ levels = ["P1", "P2", "P3", "P4", "P5"]
+ self.assertEquals(list(outputs.keys()), levels)
+ self.assertEquals(
+ outputs["P1"].shape,
+ (None, input_size // 2**1, input_size // 2**1, 16),
+ )
+ self.assertEquals(
+ outputs["P2"].shape,
+ (None, input_size // 2**2, input_size // 2**2, 24),
+ )
+ self.assertEquals(
+ outputs["P3"].shape,
+ (None, input_size // 2**3, input_size // 2**3, 40),
+ )
+ self.assertEquals(
+ outputs["P4"].shape,
+ (None, input_size // 2**4, input_size // 2**4, 112),
+ )
+ self.assertEquals(
+ outputs["P5"].shape,
+ (None, input_size // 2**5, input_size // 2**5, 1280),
+ )
+
+ @parameterized.named_parameters(
+ ("one_channel", 1),
+ ("four_channels", 4),
+ )
+ def test_application_variable_input_channels(self, num_channels):
+ model = EfficientNetLiteBackbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=True,
+ )
+ self.assertEqual(model.output_shape, (None, None, None, 1280))
diff --git a/keras_cv/models/legacy/__init__.py b/keras_cv/models/legacy/__init__.py
index 794687b9c7..20df5826f0 100644
--- a/keras_cv/models/legacy/__init__.py
+++ b/keras_cv/models/legacy/__init__.py
@@ -24,11 +24,6 @@
from keras_cv.models.legacy.convnext import ConvNeXtXLarge
from keras_cv.models.legacy.darknet import DarkNet21
from keras_cv.models.legacy.darknet import DarkNet53
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB0
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB1
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB2
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB3
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB4
from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB0
from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB1
from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB2
diff --git a/keras_cv/models/legacy/efficientnet_lite.py b/keras_cv/models/legacy/efficientnet_lite.py
deleted file mode 100644
index a2ae8d3606..0000000000
--- a/keras_cv/models/legacy/efficientnet_lite.py
+++ /dev/null
@@ -1,678 +0,0 @@
-# Copyright 2023 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""EfficientNet Lite models for Keras.
-
-Reference:
- - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
- (ICML 2019)
- - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
-""" # noqa: E501
-
-import copy
-import math
-
-import tensorflow as tf
-from keras import backend
-from keras import layers
-from tensorflow import keras
-
-from keras_cv.models.legacy import utils
-from keras_cv.models.legacy.weights import parse_weights
-from keras_cv.models.utils import correct_pad_downsample
-
-DEFAULT_BLOCKS_ARGS = [
- {
- "kernel_size": 3,
- "repeats": 1,
- "filters_in": 32,
- "filters_out": 16,
- "expand_ratio": 1,
- "id_skip": True,
- "strides": 1,
- },
- {
- "kernel_size": 3,
- "repeats": 2,
- "filters_in": 16,
- "filters_out": 24,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- },
- {
- "kernel_size": 5,
- "repeats": 2,
- "filters_in": 24,
- "filters_out": 40,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- },
- {
- "kernel_size": 3,
- "repeats": 3,
- "filters_in": 40,
- "filters_out": 80,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- },
- {
- "kernel_size": 5,
- "repeats": 3,
- "filters_in": 80,
- "filters_out": 112,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 1,
- },
- {
- "kernel_size": 5,
- "repeats": 4,
- "filters_in": 112,
- "filters_out": 192,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- },
- {
- "kernel_size": 3,
- "repeats": 1,
- "filters_in": 192,
- "filters_out": 320,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 1,
- },
-]
-CONV_KERNEL_INITIALIZER = {
- "class_name": "VarianceScaling",
- "config": {
- "scale": 2.0,
- "mode": "fan_out",
- "distribution": "truncated_normal",
- },
-}
-
-DENSE_KERNEL_INITIALIZER = {
- "class_name": "VarianceScaling",
- "config": {
- "scale": 1.0 / 3.0,
- "mode": "fan_out",
- "distribution": "uniform",
- },
-}
-
-BASE_DOCSTRING = """Instantiates the {name} architecture.
-
- Reference:
- - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
- (ICML 2019)
-
- This function returns a Keras {name} model.
-
- For image classification use cases, see [this page for detailed examples](https://keras.io/api/applications/#usage-examples-for-image-classification-models).
-
- For transfer learning use cases, make sure to read the
- [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
-
- Args:
- include_rescaling: bool, whether to rescale the inputs. If set
- to `True`, inputs will be passed through a `Rescaling(1/255.0)`
- layer.
- include_top: bool, whether to include the fully-connected layer at the
- top of the network. If provided, `num_classes` must be provided.
- num_classes: optional int, number of classes to classify images into
- (only to be specified if `include_top` is `True`).
- weights: one of `None` (random initialization), a pretrained weight file
- path, or a reference to pre-trained weights (e.g.
- 'imagenet/classification')(see available pre-trained weights in
- weights.py)
- input_shape: optional shape tuple, defaults to (None, None, 3).
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
- to use as image input for the model.
- pooling: optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model will be the 4D tensor
- output of the last convolutional block.
- - `avg` means that global average pooling will be applied to the
- output of the last convolutional block, and thus the output of
- the model will be a 2D tensor.
- - `max` means that global max pooling will be applied.
- classifier_activation: A `str` or callable. The activation function to
- use on the "top" layer. Ignored unless `include_top=True`. Set
- `classifier_activation=None` to return the logits of the "top"
- layer.
- name: (Optional) name to pass to the model, defaults to "{name}".
-
- Returns:
- A `keras.Model` instance.
-""" # noqa: E501
-
-BN_AXIS = 3
-
-
-def round_filters(filters, depth_divisor, width_coefficient):
- """Round number of filters based on depth multiplier."""
- filters *= width_coefficient
- new_filters = max(
- depth_divisor,
- int(filters + depth_divisor / 2) // depth_divisor * depth_divisor,
- )
- # Make sure that round down does not go down by more than 10%.
- if new_filters < 0.9 * filters:
- new_filters += depth_divisor
- return int(new_filters)
-
-
-def round_repeats(repeats, depth_coefficient):
- """Round number of repeats based on depth multiplier."""
- return int(math.ceil(depth_coefficient * repeats))
-
-
-def apply_efficient_net_lite_block(
- inputs,
- activation="relu6",
- drop_rate=0.0,
- name=None,
- filters_in=32,
- filters_out=16,
- kernel_size=3,
- strides=1,
- expand_ratio=1,
- id_skip=True,
-):
- """An inverted residual block, without SE phase.
-
- Args:
- inputs: input tensor.
- activation: activation function.
- drop_rate: float between 0 and 1, fraction of the input units to drop.
- name: string, block label.
- filters_in: integer, the number of input filters.
- filters_out: integer, the number of output filters.
- kernel_size: integer, the dimension of the convolution window.
- strides: integer, the stride of the convolution.
- expand_ratio: integer, scaling coefficient for the input filters.
- id_skip: boolean.
-
- Returns:
- output tensor for the block.
- """
- if name is None:
- name = f"block_{backend.get_uid('block_')}_"
-
- # Expansion phase
- filters = filters_in * expand_ratio
- if expand_ratio != 1:
- x = layers.Conv2D(
- filters,
- 1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- name=name + "expand_conv",
- )(inputs)
- x = layers.BatchNormalization(axis=BN_AXIS, name=name + "expand_bn")(x)
- x = layers.Activation(activation, name=name + "expand_activation")(x)
- else:
- x = inputs
-
- # Depthwise Convolution
- if strides == 2:
- x = layers.ZeroPadding2D(
- padding=correct_pad_downsample(x, kernel_size),
- name=name + "dwconv_pad",
- )(x)
- conv_pad = "valid"
- else:
- conv_pad = "same"
- x = layers.DepthwiseConv2D(
- kernel_size,
- strides=strides,
- padding=conv_pad,
- use_bias=False,
- depthwise_initializer=CONV_KERNEL_INITIALIZER,
- name=name + "dwconv",
- )(x)
- x = layers.BatchNormalization(axis=BN_AXIS, name=name + "bn")(x)
- x = layers.Activation(activation, name=name + "activation")(x)
-
- # Skip SE block
- # Output phase
- x = layers.Conv2D(
- filters_out,
- 1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- name=name + "project_conv",
- )(x)
- x = layers.BatchNormalization(axis=BN_AXIS, name=name + "project_bn")(x)
- if id_skip and strides == 1 and filters_in == filters_out:
- if drop_rate > 0:
- x = layers.Dropout(
- drop_rate, noise_shape=(None, 1, 1, 1), name=name + "drop"
- )(x)
- x = layers.add([x, inputs], name=name + "add")
- return x
-
-
-@keras.utils.register_keras_serializable(package="keras_cv.models")
-class EfficientNetLite(keras.Model):
- """Instantiates the EfficientNetLite architecture using given scaling
- coefficients.
-
- Args:
- include_rescaling: whether to rescale the inputs. If set to True,
- inputs will be passed through a `Rescaling(1/255.0)` layer.
- include_top: whether to include the fully-connected
- layer at the top of the network.
- width_coefficient: float, scaling coefficient for network width.
- depth_coefficient: float, scaling coefficient for network depth.
- default_size: integer, default input image size.
- dropout_rate: float, dropout rate before final classifier layer.
- drop_connect_rate: float, dropout rate at skip connections.
- depth_divisor: integer, a unit of network width.
- activation: activation function.
- blocks_args: list of dicts, parameters to construct block modules.
- model_name: string, model name.
- weights: one of `None` (random initialization),
- or the path to the weights file to be loaded.
- input_shape: optional shape tuple,
- It should have exactly 3 inputs channels.
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
- to use as image input for the model.
- pooling: optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model will be
- the 4D tensor output of the
- last convolutional layer.
- - `avg` means that global average pooling
- will be applied to the output of the
- last convolutional layer, and thus
- the output of the model will be a 2D tensor.
- - `max` means that global max pooling will
- be applied.
- num_classes: optional number of classes to classify images
- into, only to be specified if `include_top` is True, and
- if no `weights` argument is specified.
- classifier_activation: A `str` or callable. The activation function to
- use on the "top" layer. Ignored unless `include_top=True`. Set
- `classifier_activation=None` to return the logits of the "top"
- layer.
-
- Returns:
- A `keras.Model` instance.
-
- Raises:
- ValueError: if `blocks_args` is invalid.
- ValueError: in case of invalid argument for `weights`,
- or invalid input shape.
- ValueError: if `classifier_activation` is not `softmax` or `None`
- when using a pretrained top layer.
- """
-
- def __init__(
- self,
- include_rescaling,
- include_top,
- width_coefficient,
- depth_coefficient,
- default_size,
- dropout_rate=0.2,
- drop_connect_rate=0.2,
- depth_divisor=8,
- activation="relu6",
- blocks_args=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- num_classes=None,
- classifier_activation="softmax",
- **kwargs,
- ):
- if blocks_args is None:
- blocks_args = DEFAULT_BLOCKS_ARGS
- if not isinstance(blocks_args, list):
- raise ValueError(
- "The `blocks_args` argument should be either `None` or valid"
- "list of dicts for building blocks. "
- f"Received: blocks_args={blocks_args}"
- )
- intact_blocks_args = copy.deepcopy(blocks_args) # for configs
- blocks_args = copy.deepcopy(blocks_args)
-
- if weights and not tf.io.gfile.exists(weights):
- raise ValueError(
- "The `weights` argument should be either `None` or the path to "
- "the weights file to be loaded. "
- f"Weights file not found at location: {weights}"
- )
-
- if include_top and not num_classes:
- raise ValueError(
- "If `include_top` is True, you should specify `num_classes`. "
- f"Received: num_classes={num_classes}"
- )
-
- if include_top and pooling:
- raise ValueError(
- f"`pooling` must be `None` when `include_top=True`."
- f"Received pooling={pooling} and include_top={include_top}. "
- )
-
- img_input = utils.parse_model_inputs(input_shape, input_tensor)
-
- # Build stem
- x = img_input
-
- if include_rescaling:
- # Use common rescaling strategy across keras_cv
- x = layers.Rescaling(1.0 / 255.0)(x)
-
- x = layers.ZeroPadding2D(
- padding=correct_pad_downsample(x, 3), name="stem_conv_pad"
- )(x)
- x = layers.Conv2D(
- 32,
- 3,
- strides=2,
- padding="valid",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- name="stem_conv",
- )(x)
- x = layers.BatchNormalization(axis=BN_AXIS, name="stem_bn")(x)
- x = layers.Activation(activation, name="stem_activation")(x)
-
- # Build blocks
- b = 0
- blocks = float(sum(args["repeats"] for args in blocks_args))
-
- for i, args in enumerate(blocks_args):
- assert args["repeats"] > 0
- # Update block input and output filters based on depth multiplier.
- args["filters_in"] = round_filters(
- filters=args["filters_in"],
- width_coefficient=width_coefficient,
- depth_divisor=depth_divisor,
- )
- args["filters_out"] = round_filters(
- filters=args["filters_out"],
- width_coefficient=width_coefficient,
- depth_divisor=depth_divisor,
- )
-
- if i == 0 or i == (len(blocks_args) - 1):
- repeats = args.pop("repeats")
- else:
- repeats = round_repeats(
- repeats=args.pop("repeats"),
- depth_coefficient=depth_coefficient,
- )
-
- for j in range(repeats):
- # The first block needs to take care of stride and filter size
- # increase.
- if j > 0:
- args["strides"] = 1
- args["filters_in"] = args["filters_out"]
- x = apply_efficient_net_lite_block(
- x,
- activation=activation,
- drop_rate=drop_connect_rate * b / blocks,
- name="block{}{}_".format(i + 1, chr(j + 97)),
- **args,
- )
-
- b += 1
-
- # Build top
- x = layers.Conv2D(
- 1280,
- 1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- name="top_conv",
- )(x)
- x = layers.BatchNormalization(axis=BN_AXIS, name="top_bn")(x)
- x = layers.Activation(activation, name="top_activation")(x)
-
- if include_top:
- x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
- if dropout_rate > 0:
- x = layers.Dropout(dropout_rate, name="top_dropout")(x)
- x = layers.Dense(
- num_classes,
- activation=classifier_activation,
- kernel_initializer=DENSE_KERNEL_INITIALIZER,
- name="predictions",
- )(x)
- else:
- if pooling == "avg":
- x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
- elif pooling == "max":
- x = layers.GlobalMaxPooling2D(name="max_pool")(x)
-
- inputs = img_input
-
- # Create model.
- super().__init__(inputs=inputs, outputs=x, **kwargs)
-
- # Load weights.
- if weights is not None:
- self.load_weights(weights)
-
- self.include_rescaling = include_rescaling
- self.include_top = include_top
- self.width_coefficient = width_coefficient
- self.depth_coefficient = depth_coefficient
- self.default_size = default_size
- self.dropout_rate = dropout_rate
- self.drop_connect_rate = drop_connect_rate
- self.depth_divisor = depth_divisor
- self.activation = activation
- self.blocks_args = intact_blocks_args
- self.input_tensor = input_tensor
- self.pooling = pooling
- self.num_classes = num_classes
- self.classifier_activation = classifier_activation
-
- def get_config(self):
- return {
- "include_rescaling": self.include_rescaling,
- "include_top": self.include_top,
- "width_coefficient": self.width_coefficient,
- "depth_coefficient": self.depth_coefficient,
- "default_size": self.default_size,
- "dropout_rate": self.dropout_rate,
- "drop_connect_rate": self.drop_connect_rate,
- "depth_divisor": self.depth_divisor,
- "activation": self.activation,
- "blocks_args": self.blocks_args,
- # Remove batch dimension from `input_shape`
- "input_shape": self.input_shape[1:],
- "input_tensor": self.input_tensor,
- "pooling": self.pooling,
- "num_classes": self.num_classes,
- "classifier_activation": self.classifier_activation,
- "name": self.name,
- "trainable": self.trainable,
- }
-
- @classmethod
- def from_config(cls, config):
- return cls(**config)
-
-
-def EfficientNetLiteB0(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- **kwargs,
-):
- return EfficientNetLite(
- include_rescaling,
- include_top,
- width_coefficient=1.0,
- depth_coefficient=1.0,
- default_size=224,
- dropout_rate=0.2,
- name="efficientnetliteb0",
- weights=parse_weights(weights, include_top, "efficientnetliteb0"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetLiteB1(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- **kwargs,
-):
- return EfficientNetLite(
- include_rescaling,
- include_top,
- width_coefficient=1.0,
- depth_coefficient=1.1,
- default_size=240,
- dropout_rate=0.2,
- name="efficientnetliteb1",
- weights=parse_weights(weights, include_top, "efficientnetliteb1"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetLiteB2(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- **kwargs,
-):
- return EfficientNetLite(
- include_rescaling,
- include_top,
- width_coefficient=1.1,
- depth_coefficient=1.2,
- default_size=260,
- dropout_rate=0.3,
- name="efficientnetliteb2",
- weights=parse_weights(weights, include_top, "efficientnetliteb2"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetLiteB3(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- **kwargs,
-):
- return EfficientNetLite(
- include_rescaling,
- include_top,
- width_coefficient=1.2,
- depth_coefficient=1.4,
- default_size=280,
- dropout_rate=0.3,
- name="efficientnetliteb3",
- weights=parse_weights(weights, include_top, "efficientnetliteb3"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetLiteB4(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- **kwargs,
-):
- return EfficientNetLite(
- include_rescaling,
- include_top,
- width_coefficient=1.4,
- depth_coefficient=1.8,
- default_size=300,
- dropout_rate=0.3,
- name="efficientnetliteb4",
- weights=parse_weights(weights, include_top, "efficientnetliteb4"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-EfficientNetLiteB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB0")
-EfficientNetLiteB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB1")
-EfficientNetLiteB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB2")
-EfficientNetLiteB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB3")
-EfficientNetLiteB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB4")
diff --git a/keras_cv/models/legacy/efficientnet_lite_test.py b/keras_cv/models/legacy/efficientnet_lite_test.py
deleted file mode 100644
index daa1d0e2c0..0000000000
--- a/keras_cv/models/legacy/efficientnet_lite_test.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2023 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from absl.testing import parameterized
-
-from keras_cv.models.legacy import efficientnet_lite
-from keras_cv.tests.test_case import TestCase
-
-from .models_test import ModelsTest
-
-MODEL_LIST = [
- (efficientnet_lite.EfficientNetLiteB0, 1280, {}),
-]
-
-"""
-Below are other configurations that we omit from our CI but that can/should
-be tested manually when making changes to this model.
-(efficientnet_lite.EfficientNetLiteB1, 1280, {}),
-(efficientnet_lite.EfficientNetLiteB2, 1280, {}),
-(efficientnet_lite.EfficientNetLiteB3, 1280, {}),
-(efficientnet_lite.EfficientNetLiteB4, 1280, {}),
-"""
-
-
-class EfficientNetLiteTest(ModelsTest, TestCase):
- @parameterized.parameters(*MODEL_LIST)
- def test_application_base(self, app, _, args):
- super()._test_application_base(app, _, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_application_with_rescaling(self, app, last_dim, args):
- super()._test_application_with_rescaling(app, last_dim, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_application_pooling(self, app, last_dim, args):
- super()._test_application_pooling(app, last_dim, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_application_variable_input_channels(self, app, last_dim, args):
- super()._test_application_variable_input_channels(app, last_dim, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_model_can_be_used_as_backbone(self, app, last_dim, args):
- super()._test_model_can_be_used_as_backbone(app, last_dim, args)
From 1602b17eed20b187e6bb0f4656f1fbeff57947b2 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Thu, 17 Aug 2023 20:47:24 +0530
Subject: [PATCH 06/17] Migrate Efficientnetv1 to Backbone (#1716)
* created new files
* moved old files to new one
* efficientnetv1 backbone, presets, and imports updated
* fix imports
* fix imports
* added preset unit test
* updated backbone
* fix init
* fix legacy init
* fix aliases presets
* fix backbone round filter argument
* fix depthwise conv
* fix conv kernel initializer
* fix depthwiseconv
* fix scope name match pattern error
* fix scope name
* fix block name
* remove block args preset
* remove model_name
* remove default_size from preset
* updated test cases
* updated docs
* fix id_skip
* fix test
* fix format
* reviewd comments
* fix format
* fix typo
* fix naming
* test with layer-matching
* fix typo
* fix format
* backbone test updated
* review changes
* port
* fix port
* fix port 2
* port: fix argument
* port: final fix
* port:docs typo
* port: update
* review comment
* format
---
keras_cv/models/__init__.py | 27 +
keras_cv/models/backbones/backbone_presets.py | 5 +
.../backbones/efficientnet_v1/__init__.py | 13 +
.../efficientnet_v1_aliases.py | 315 ++++++
.../efficientnet_v1_backbone.py | 454 +++++++++
.../efficientnet_v1_backbone_presets.py | 337 +++++++
.../efficientnet_v1_backbone_presets_test.py | 60 ++
.../efficientnet_v1_backbone_test.py | 198 ++++
keras_cv/models/legacy/__init__.py | 8 -
keras_cv/models/legacy/efficientnet_v1.py | 937 ------------------
.../models/legacy/efficientnet_v1_test.py | 58 --
11 files changed, 1409 insertions(+), 1003 deletions(-)
create mode 100644 keras_cv/models/backbones/efficientnet_v1/__init__.py
create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py
create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py
create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py
create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py
delete mode 100644 keras_cv/models/legacy/efficientnet_v1.py
delete mode 100644 keras_cv/models/legacy/efficientnet_v1_test.py
diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py
index 1861b49c03..4191c07575 100644
--- a/keras_cv/models/__init__.py
+++ b/keras_cv/models/__init__.py
@@ -61,6 +61,33 @@
from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import ( # noqa: E501
EfficientNetLiteBackbone,
)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B1Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B2Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B3Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B4Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B5Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B6Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B7Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1Backbone,
+)
from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import (
EfficientNetV2B0Backbone,
)
diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py
index 3852b28877..614f85cd24 100644
--- a/keras_cv/models/backbones/backbone_presets.py
+++ b/keras_cv/models/backbones/backbone_presets.py
@@ -19,6 +19,9 @@
from keras_cv.models.backbones.efficientnet_lite import (
efficientnet_lite_backbone_presets,
)
+from keras_cv.models.backbones.efficientnet_v1 import (
+ efficientnet_v1_backbone_presets,
+)
from keras_cv.models.backbones.efficientnet_v2 import (
efficientnet_v2_backbone_presets,
)
@@ -32,6 +35,7 @@
**resnet_v2_backbone_presets.backbone_presets_no_weights,
**mobilenet_v3_backbone_presets.backbone_presets_no_weights,
**csp_darknet_backbone_presets.backbone_presets_no_weights,
+ **efficientnet_v1_backbone_presets.backbone_presets_no_weights,
**efficientnet_v2_backbone_presets.backbone_presets_no_weights,
**densenet_backbone_presets.backbone_presets_no_weights,
**efficientnet_lite_backbone_presets.backbone_presets_no_weights,
@@ -43,6 +47,7 @@
**resnet_v2_backbone_presets.backbone_presets_with_weights,
**mobilenet_v3_backbone_presets.backbone_presets_with_weights,
**csp_darknet_backbone_presets.backbone_presets_with_weights,
+ **efficientnet_v1_backbone_presets.backbone_presets_with_weights,
**efficientnet_v2_backbone_presets.backbone_presets_with_weights,
**densenet_backbone_presets.backbone_presets_with_weights,
**efficientnet_lite_backbone_presets.backbone_presets_with_weights,
diff --git a/keras_cv/models/backbones/efficientnet_v1/__init__.py b/keras_cv/models/backbones/efficientnet_v1/__init__.py
new file mode 100644
index 0000000000..3992ffb59a
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py
new file mode 100644
index 0000000000..587c0e70ff
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py
@@ -0,0 +1,315 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import (
+ EfficientNetV1Backbone,
+)
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """Instantiates the {name} architecture.
+
+ Reference:
+ - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+ (ICML 2019)
+
+ Args:
+ include_rescaling: bool, whether to rescale the inputs. If set
+ to `True`, inputs will be passed through a `Rescaling(1/255.0)`
+ layer.
+ input_shape: optional shape tuple, defaults to (None, None, 3).
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+ to use as image input for the model.
+""" # noqa: E501
+
+
+class EfficientNetV1B0Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b0", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B1Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b1", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B2Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b2", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B3Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b3", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B4Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b4", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B5Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b5", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B6Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b6", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+class EfficientNetV1B7Backbone(EfficientNetV1Backbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return EfficientNetV1Backbone.from_preset("efficientnetv1_b7", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return {}
+
+
+setattr(
+ EfficientNetV1B0Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B0"),
+)
+setattr(
+ EfficientNetV1B1Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B1"),
+)
+setattr(
+ EfficientNetV1B2Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B2"),
+)
+setattr(
+ EfficientNetV1B3Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B3"),
+)
+setattr(
+ EfficientNetV1B4Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B4"),
+)
+setattr(
+ EfficientNetV1B5Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B5"),
+)
+setattr(
+ EfficientNetV1B6Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B6"),
+)
+setattr(
+ EfficientNetV1B7Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV1B7"),
+)
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
new file mode 100644
index 0000000000..cc39d3d31c
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
@@ -0,0 +1,454 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import math
+
+from keras_cv.backend import keras
+from keras_cv.models import utils
+from keras_cv.models.backbones.backbone import Backbone
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone_presets import ( # noqa: E501
+ backbone_presets,
+)
+from keras_cv.utils.python_utils import classproperty
+
+
+@keras.saving.register_keras_serializable(package="keras_cv.models")
+class EfficientNetV1Backbone(Backbone):
+ """Instantiates the EfficientNetV1 architecture.
+
+ Reference:
+ - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+ (ICML 2019)
+ - [Based on the original keras.applications EfficientNet](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet.py)
+
+ Args:
+ include_rescaling: bool, whether to rescale the inputs. If set to
+ True, inputs will be passed through a `Rescaling(1/255.0)` layer.
+ width_coefficient: float, scaling coefficient for network width.
+ depth_coefficient: float, scaling coefficient for network depth.
+ dropout_rate: float, dropout rate before final classifier layer.
+ drop_connect_rate: float, dropout rate at skip connections. The default
+ value is set to 0.2.
+ depth_divisor: integer, a unit of network width. The default value is
+ set to 8.
+ activation: activation function to use between each convolutional layer.
+ input_shape: optional shape tuple, it should have exactly 3 input
+ channels.
+ input_tensor: optional Keras tensor (i.e. output of `keras.keras.layers.Input()`) to
+ use as image input for the model.
+ stackwise_kernel_sizes: list of ints, the kernel sizes used for each
+ conv block.
+ stackwise_num_repeats: list of ints, number of times to repeat each
+ conv block.
+ stackwise_input_filters: list of ints, number of input filters for
+ each conv block.
+ stackwise_output_filters: list of ints, number of output filters for
+ each stack in the conv blocks model.
+ stackwise_expansion_ratios: list of floats, expand ratio passed to the
+ squeeze and excitation blocks.
+ stackwise_strides: list of ints, stackwise_strides for each conv block.
+ stackwise_squeeze_and_excite_ratios: list of ints, the squeeze and
+ excite ratios passed to the squeeze and excitation blocks.
+
+ Usage:
+ ```python
+ # Construct an EfficientNetV1 from a preset:
+ efficientnet = keras_cv.models.EfficientNetV1Backbone.from_preset(
+ "efficientnetv1_b0"
+ )
+ images = np.ones((1, 256, 256, 3))
+ outputs = efficientnet.predict(images)
+
+ # Alternatively, you can also customize the EfficientNetV1 architecture:
+ model = EfficientNetV1Backbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ stackwise_squeeze_and_excite_ratios=[
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=False,
+ )
+ images = np.ones((1, 256, 256, 3))
+ outputs = efficientnet.predict(images)
+ ```
+ """ # noqa: E501
+
+ def __init__(
+ self,
+ *,
+ include_rescaling,
+ width_coefficient,
+ depth_coefficient,
+ stackwise_kernel_sizes,
+ stackwise_num_repeats,
+ stackwise_input_filters,
+ stackwise_output_filters,
+ stackwise_expansion_ratios,
+ stackwise_strides,
+ stackwise_squeeze_and_excite_ratios,
+ dropout_rate=0.2,
+ drop_connect_rate=0.2,
+ depth_divisor=8,
+ input_shape=(None, None, 3),
+ input_tensor=None,
+ activation="swish",
+ **kwargs,
+ ):
+ img_input = utils.parse_model_inputs(input_shape, input_tensor)
+
+ x = img_input
+
+ if include_rescaling:
+ # Use common rescaling strategy across keras_cv
+ x = keras.layers.Rescaling(1.0 / 255.0)(x)
+
+ x = keras.layers.ZeroPadding2D(
+ padding=utils.correct_pad_downsample(x, 3), name="stem_conv_pad"
+ )(x)
+
+ # Build stem
+ stem_filters = round_filters(
+ filters=stackwise_input_filters[0],
+ width_coefficient=width_coefficient,
+ divisor=depth_divisor,
+ )
+ x = keras.layers.Conv2D(
+ filters=stem_filters,
+ kernel_size=3,
+ strides=2,
+ padding="valid",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name="stem_conv",
+ )(x)
+ x = keras.layers.BatchNormalization(
+ axis=3,
+ name="stem_bn",
+ )(x)
+ x = keras.layers.Activation(activation, name="stem_activation")(x)
+
+ # Build blocks
+ block_id = 0
+ blocks = float(sum(stackwise_num_repeats))
+
+ pyramid_level_inputs = []
+ for i in range(len(stackwise_kernel_sizes)):
+ num_repeats = stackwise_num_repeats[i]
+ input_filters = stackwise_input_filters[i]
+ output_filters = stackwise_output_filters[i]
+
+ # Update block input and output filters based on depth multiplier.
+ input_filters = round_filters(
+ filters=input_filters,
+ width_coefficient=width_coefficient,
+ divisor=depth_divisor,
+ )
+ output_filters = round_filters(
+ filters=output_filters,
+ width_coefficient=width_coefficient,
+ divisor=depth_divisor,
+ )
+
+ repeats = round_repeats(
+ repeats=num_repeats,
+ depth_coefficient=depth_coefficient,
+ )
+ strides = stackwise_strides[i]
+ squeeze_and_excite_ratio = stackwise_squeeze_and_excite_ratios[i]
+
+ for j in range(repeats):
+ # The first block needs to take care of stride and filter size
+ # increase.
+ if j > 0:
+ strides = 1
+ input_filters = output_filters
+
+ if strides != 1:
+ pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+ # 97 is the start of the lowercase alphabet.
+ letter_identifier = chr(j + 97)
+ x = apply_efficientnet_block(
+ inputs=x,
+ filters_in=input_filters,
+ filters_out=output_filters,
+ kernel_size=stackwise_kernel_sizes[i],
+ strides=strides,
+ expand_ratio=stackwise_expansion_ratios[i],
+ se_ratio=squeeze_and_excite_ratio,
+ activation=activation,
+ dropout_rate=drop_connect_rate * block_id / blocks,
+ name="block{}{}_".format(i + 1, letter_identifier),
+ )
+ block_id += 1
+
+ # Build top
+ top_filters = round_filters(
+ filters=1280,
+ width_coefficient=width_coefficient,
+ divisor=depth_divisor,
+ )
+
+ x = keras.layers.Conv2D(
+ filters=top_filters,
+ kernel_size=1,
+ padding="same",
+ strides=1,
+ kernel_initializer=conv_kernel_initializer(),
+ use_bias=False,
+ name="top_conv",
+ )(x)
+ x = keras.layers.BatchNormalization(
+ axis=3,
+ name="top_bn",
+ )(x)
+ x = keras.layers.Activation(
+ activation=activation, name="top_activation"
+ )(x)
+
+ pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+ # Create model.
+ super().__init__(inputs=img_input, outputs=x, **kwargs)
+
+ self.include_rescaling = include_rescaling
+ self.width_coefficient = width_coefficient
+ self.depth_coefficient = depth_coefficient
+ self.dropout_rate = dropout_rate
+ self.drop_connect_rate = drop_connect_rate
+ self.depth_divisor = depth_divisor
+ self.activation = activation
+ self.input_tensor = input_tensor
+ self.pyramid_level_inputs = {
+ f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs)
+ }
+ self.stackwise_kernel_sizes = stackwise_kernel_sizes
+ self.stackwise_num_repeats = stackwise_num_repeats
+ self.stackwise_input_filters = stackwise_input_filters
+ self.stackwise_output_filters = stackwise_output_filters
+ self.stackwise_expansion_ratios = stackwise_expansion_ratios
+ self.stackwise_strides = stackwise_strides
+ self.stackwise_squeeze_and_excite_ratios = (
+ stackwise_squeeze_and_excite_ratios
+ )
+
+ def get_config(self):
+ config = super().get_config()
+ config.update(
+ {
+ "include_rescaling": self.include_rescaling,
+ "width_coefficient": self.width_coefficient,
+ "depth_coefficient": self.depth_coefficient,
+ "dropout_rate": self.dropout_rate,
+ "drop_connect_rate": self.drop_connect_rate,
+ "depth_divisor": self.depth_divisor,
+ "activation": self.activation,
+ "input_tensor": self.input_tensor,
+ "input_shape": self.input_shape[1:],
+ "trainable": self.trainable,
+ "stackwise_kernel_sizes": self.stackwise_kernel_sizes,
+ "stackwise_num_repeats": self.stackwise_num_repeats,
+ "stackwise_input_filters": self.stackwise_input_filters,
+ "stackwise_output_filters": self.stackwise_output_filters,
+ "stackwise_expansion_ratios": self.stackwise_expansion_ratios,
+ "stackwise_strides": self.stackwise_strides,
+ "stackwise_squeeze_and_excite_ratios": (
+ self.stackwise_squeeze_and_excite_ratios
+ ),
+ }
+ )
+ return config
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return copy.deepcopy(backbone_presets)
+
+
+def conv_kernel_initializer(scale=2.0):
+ return keras.initializers.VarianceScaling(
+ scale=scale, mode="fan_out", distribution="truncated_normal"
+ )
+
+
+def round_filters(filters, width_coefficient, divisor):
+ """Round number of filters based on depth multiplier.
+
+ Args:
+ filters: int, number of filters for Conv layer
+ width_coefficient: float, denotes the scaling coefficient of network
+ width
+ divisor: int, a unit of network width
+
+ Returns:
+ int, new rounded filters value for Conv layer
+ """
+ filters *= width_coefficient
+ new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
+ # Make sure that round down does not go down by more than 10%.
+ if new_filters < 0.9 * filters:
+ new_filters += divisor
+ return int(new_filters)
+
+
+def round_repeats(repeats, depth_coefficient):
+ """Round number of repeats based on depth multiplier.
+
+ Args:
+ repeats: int, number of repeats of efficientnet block
+ depth_coefficient: float, denotes the scaling coefficient of network
+ depth
+
+ Returns:
+ int, rounded repeats
+ """
+ return int(math.ceil(depth_coefficient * repeats))
+
+
+def apply_efficientnet_block(
+ inputs,
+ filters_in=32,
+ filters_out=16,
+ kernel_size=3,
+ strides=1,
+ activation="swish",
+ expand_ratio=1,
+ se_ratio=0.0,
+ dropout_rate=0.0,
+ name="",
+):
+ """An inverted residual block.
+
+ Args:
+ inputs: Tensor, The input tensor of the block
+ filters_in: integer, the number of input filters.
+ filters_out: integer, the number of output filters.
+ kernel_size: integer, the dimension of the convolution window.
+ strides: integer, the stride of the convolution.
+ activation: activation function to use between each convolutional layer.
+ expand_ratio: integer, scaling coefficient for the input filters.
+ se_ratio: float between 0 and 1, fraction to squeeze the input filters.
+ dropout_rate: float between 0 and 1, fraction of the input units to drop.
+ name: string, block label.
+
+ Returns:
+ output tensor for the block.
+ """ # noqa: E501
+ filters = filters_in * expand_ratio
+ if expand_ratio != 1:
+ x = keras.layers.Conv2D(
+ filters=filters,
+ kernel_size=1,
+ strides=1,
+ padding="same",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name=name + "expand_conv",
+ )(inputs)
+ x = keras.layers.BatchNormalization(
+ axis=3,
+ name=name + "expand_bn",
+ )(x)
+ x = keras.layers.Activation(
+ activation, name=name + "expand_activation"
+ )(x)
+ else:
+ x = inputs
+
+ # Depthwise Convolution
+ if strides == 2:
+ x = keras.layers.ZeroPadding2D(
+ padding=utils.correct_pad_downsample(x, kernel_size),
+ name=name + "dwconv_pad",
+ )(x)
+ conv_pad = "valid"
+ else:
+ conv_pad = "same"
+
+ x = keras.layers.DepthwiseConv2D(
+ kernel_size=kernel_size,
+ strides=strides,
+ padding=conv_pad,
+ use_bias=False,
+ depthwise_initializer=conv_kernel_initializer(),
+ name=name + "dwconv",
+ )(x)
+ x = keras.layers.BatchNormalization(
+ axis=3,
+ name=name + "dwconv_bn",
+ )(x)
+ x = keras.layers.Activation(activation, name=name + "dwconv_activation")(x)
+
+ # Squeeze and Excitation phase
+ if 0 < se_ratio <= 1:
+ filters_se = max(1, int(filters_in * se_ratio))
+ se = keras.layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
+ se_shape = (1, 1, filters)
+ se = keras.layers.Reshape(se_shape, name=name + "se_reshape")(se)
+ se = keras.layers.Conv2D(
+ filters_se,
+ 1,
+ padding="same",
+ activation=activation,
+ kernel_initializer=conv_kernel_initializer(),
+ name=name + "se_reduce",
+ )(se)
+ se = keras.layers.Conv2D(
+ filters,
+ 1,
+ padding="same",
+ activation="sigmoid",
+ kernel_initializer=conv_kernel_initializer(),
+ name=name + "se_expand",
+ )(se)
+ x = keras.layers.multiply([x, se], name=name + "se_excite")
+
+ # Output phase
+ x = keras.layers.Conv2D(
+ filters=filters_out,
+ kernel_size=1,
+ strides=1,
+ padding="same",
+ use_bias=False,
+ kernel_initializer=conv_kernel_initializer(),
+ name=name + "project",
+ )(x)
+ x = keras.layers.BatchNormalization(
+ axis=3,
+ name=name + "project_bn",
+ )(x)
+ x = keras.layers.Activation(activation, name=name + "project_activation")(x)
+
+ if strides == 1 and filters_in == filters_out:
+ if dropout_rate > 0:
+ x = keras.layers.Dropout(
+ dropout_rate,
+ noise_shape=(None, 1, 1, 1),
+ name=name + "drop",
+ )(x)
+ x = keras.layers.add([x, inputs], name=name + "add")
+
+ return x
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py
new file mode 100644
index 0000000000..a2aac81d26
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py
@@ -0,0 +1,337 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""EfficientNetV1 model preset configurations."""
+
+backbone_presets_no_weights = {
+ "efficientnetv1_b0": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.0` and `depth_coefficient=1.0`."
+ ),
+ "params": 4050716,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.0,
+ "depth_coefficient": 1.0,
+ "dropout_rate": 0.2,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b1": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.0` and `depth_coefficient=1.1`."
+ ),
+ "params": 6576704,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.0,
+ "depth_coefficient": 1.1,
+ "dropout_rate": 0.2,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b2": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.1` and `depth_coefficient=1.2`."
+ ),
+ "params": 7770034,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.1,
+ "depth_coefficient": 1.2,
+ "dropout_rate": 0.3,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b3": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.2` and `depth_coefficient=1.4`."
+ ),
+ "params": 10785960,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.2,
+ "depth_coefficient": 1.4,
+ "dropout_rate": 0.3,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b4": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.4` and `depth_coefficient=1.8`."
+ ),
+ "params": 17676984,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.4,
+ "depth_coefficient": 1.8,
+ "dropout_rate": 0.4,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b5": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.6` and `depth_coefficient=2.2`."
+ ),
+ "params": 28517360,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.6,
+ "depth_coefficient": 2.2,
+ "dropout_rate": 0.4,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b6": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=1.8` and `depth_coefficient=2.6`."
+ ),
+ "params": 40965800,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 1.8,
+ "depth_coefficient": 2.6,
+ "dropout_rate": 0.5,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+ "efficientnetv1_b7": {
+ "metadata": {
+ "description": (
+ "EfficientNet B-style architecture with 7 "
+ "convolutional blocks. This B-style model has "
+ "`width_coefficient=2.0` and `depth_coefficient=3.1`."
+ ),
+ "params": 64105488,
+ "official_name": "EfficientNetV1",
+ "path": "efficientnetv1",
+ },
+ "class_name": "keras_cv.models>EfficientNetV1Backbone",
+ "config": {
+ "width_coefficient": 2.0,
+ "depth_coefficient": 3.1,
+ "dropout_rate": 0.5,
+ "drop_connect_rate": 0.2,
+ "depth_divisor": 8,
+ "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+ "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+ "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+ "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+ "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+ "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+ "stackwise_squeeze_and_excite_ratios": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ "include_rescaling": True,
+ "input_shape": (None, None, 3),
+ "input_tensor": None,
+ "activation": "swish",
+ },
+ },
+}
+
+backbone_presets_with_weights = {}
+
+backbone_presets = {
+ **backbone_presets_no_weights,
+ **backbone_presets_with_weights,
+}
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py
new file mode 100644
index 0000000000..fd73068311
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py
@@ -0,0 +1,60 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import (
+ EfficientNetV1Backbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+@pytest.mark.extra_large
+class EfficientNetV1PresetFullTest(TestCase):
+ """
+ Test the full enumeration of our preset.
+ This every presets for EfficientNetV1 and is only run manually.
+ Run with:
+ `pytest keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py --run_extra_large`
+ """ # noqa: E501
+
+ @parameterized.named_parameters(
+ *[(preset, preset) for preset in EfficientNetV1Backbone.presets]
+ )
+ def test_load_efficientnet(self, preset):
+ input_data = np.ones(shape=(2, 224, 224, 3))
+ model = EfficientNetV1Backbone.from_preset(preset)
+ model(input_data)
+
+ def test_efficientnet_feature_extractor(self):
+ model = EfficientNetV1B0Backbone(
+ include_rescaling=False,
+ input_shape=[256, 256, 3],
+ )
+ levels = ["P3", "P4"]
+ layer_names = [model.pyramid_level_inputs[level] for level in levels]
+ backbone_model = get_feature_extractor(model, layer_names, levels)
+ inputs = keras.Input(shape=[256, 256, 3])
+ outputs = backbone_model(inputs)
+ self.assertLen(outputs, 2)
+ self.assertEquals(list(outputs.keys()), levels)
+ self.assertEquals(outputs["P3"].shape[:3], (None, 32, 32))
+ self.assertEquals(outputs["P4"].shape[:3], (None, 16, 16))
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py
new file mode 100644
index 0000000000..ac8a8dfa81
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py
@@ -0,0 +1,198 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+ EfficientNetV1B0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import (
+ EfficientNetV1Backbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+class EfficientNetV1BackboneTest(TestCase):
+ def setUp(self):
+ self.input_batch = np.ones(shape=(8, 224, 224, 3))
+
+ def test_valid_call(self):
+ model = EfficientNetV1Backbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ stackwise_squeeze_and_excite_ratios=[
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=False,
+ )
+ model(self.input_batch)
+
+ def test_valid_call_alias_model_with_rescaling(self):
+ model = EfficientNetV1B0Backbone(include_rescaling=True)
+ model(self.input_batch)
+
+ def test_valid_call_with_rescaling(self):
+ model = EfficientNetV1Backbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ stackwise_squeeze_and_excite_ratios=[
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=True,
+ )
+ model(self.input_batch)
+
+ @pytest.mark.large # Saving is slow, so mark these large.
+ def test_saved_model(self):
+ model = EfficientNetV1Backbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ stackwise_squeeze_and_excite_ratios=[
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=True,
+ )
+ model_output = model(self.input_batch)
+ save_path = os.path.join(
+ self.get_temp_dir(), "efficientnet_v1_backbone.keras"
+ )
+ model.save(save_path)
+ restored_model = keras.models.load_model(save_path)
+
+ # Check we got the real object back.
+ self.assertIsInstance(restored_model, EfficientNetV1Backbone)
+
+ # Check that output matches.
+ restored_output = restored_model(self.input_batch)
+ self.assertAllClose(model_output, restored_output)
+
+ @pytest.mark.large # Saving is slow, so mark these large.
+ def test_saved_alias_model(self):
+ model = EfficientNetV1B0Backbone()
+ model_output = model(self.input_batch)
+ save_path = os.path.join(
+ self.get_temp_dir(), "efficientnet_v1_backbone.keras"
+ )
+ model.save(save_path)
+ restored_model = keras.models.load_model(save_path)
+
+ # Check we got the real object back.
+ # Note that these aliases serialized as the base class
+ self.assertIsInstance(restored_model, EfficientNetV1Backbone)
+
+ # Check that output matches.
+ restored_output = restored_model(self.input_batch)
+ self.assertAllClose(model_output, restored_output)
+
+ def test_feature_pyramid_inputs(self):
+ model = EfficientNetV1B0Backbone()
+ backbone_model = get_feature_extractor(
+ model,
+ model.pyramid_level_inputs.values(),
+ model.pyramid_level_inputs.keys(),
+ )
+ input_size = 256
+ inputs = keras.Input(shape=[input_size, input_size, 3])
+ outputs = backbone_model(inputs)
+ levels = ["P1", "P2", "P3", "P4", "P5"]
+ self.assertEquals(list(outputs.keys()), levels)
+ self.assertEquals(
+ outputs["P1"].shape,
+ (None, input_size // 2**1, input_size // 2**1, 16),
+ )
+ self.assertEquals(
+ outputs["P2"].shape,
+ (None, input_size // 2**2, input_size // 2**2, 24),
+ )
+ self.assertEquals(
+ outputs["P3"].shape,
+ (None, input_size // 2**3, input_size // 2**3, 40),
+ )
+ self.assertEquals(
+ outputs["P4"].shape,
+ (None, input_size // 2**4, input_size // 2**4, 112),
+ )
+ self.assertEquals(
+ outputs["P5"].shape,
+ (None, input_size // 2**5, input_size // 2**5, 1280),
+ )
+
+ @parameterized.named_parameters(
+ ("one_channel", 1),
+ ("four_channels", 4),
+ )
+ def test_application_variable_input_channels(self, num_channels):
+ model = EfficientNetV1Backbone(
+ stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+ stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+ stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+ stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+ stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+ stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+ stackwise_squeeze_and_excite_ratios=[
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ ],
+ width_coefficient=1.0,
+ depth_coefficient=1.0,
+ include_rescaling=True,
+ )
+ self.assertEqual(model.output_shape, (None, None, None, 1280))
diff --git a/keras_cv/models/legacy/__init__.py b/keras_cv/models/legacy/__init__.py
index 20df5826f0..419ae34b31 100644
--- a/keras_cv/models/legacy/__init__.py
+++ b/keras_cv/models/legacy/__init__.py
@@ -24,14 +24,6 @@
from keras_cv.models.legacy.convnext import ConvNeXtXLarge
from keras_cv.models.legacy.darknet import DarkNet21
from keras_cv.models.legacy.darknet import DarkNet53
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB0
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB1
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB2
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB3
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB4
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB5
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB6
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB7
from keras_cv.models.legacy.mlp_mixer import MLPMixerB16
from keras_cv.models.legacy.mlp_mixer import MLPMixerB32
from keras_cv.models.legacy.mlp_mixer import MLPMixerL16
diff --git a/keras_cv/models/legacy/efficientnet_v1.py b/keras_cv/models/legacy/efficientnet_v1.py
deleted file mode 100644
index b91a63697a..0000000000
--- a/keras_cv/models/legacy/efficientnet_v1.py
+++ /dev/null
@@ -1,937 +0,0 @@
-# Copyright 2022 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""EfficientNet models for Keras.
-
-Reference:
- - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
- (ICML 2019)
- - [Based on the original keras.applications EfficientNet](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet.py)
-""" # noqa: E501
-
-import copy
-import math
-
-import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras import backend
-from tensorflow.keras import layers
-
-from keras_cv.models.legacy import utils
-from keras_cv.models.legacy.weights import parse_weights
-
-DEFAULT_BLOCKS_ARGS = [
- {
- "kernel_size": 3,
- "repeats": 1,
- "filters_in": 32,
- "filters_out": 16,
- "expand_ratio": 1,
- "id_skip": True,
- "strides": 1,
- "se_ratio": 0.25,
- },
- {
- "kernel_size": 3,
- "repeats": 2,
- "filters_in": 16,
- "filters_out": 24,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- "se_ratio": 0.25,
- },
- {
- "kernel_size": 5,
- "repeats": 2,
- "filters_in": 24,
- "filters_out": 40,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- "se_ratio": 0.25,
- },
- {
- "kernel_size": 3,
- "repeats": 3,
- "filters_in": 40,
- "filters_out": 80,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- "se_ratio": 0.25,
- },
- {
- "kernel_size": 5,
- "repeats": 3,
- "filters_in": 80,
- "filters_out": 112,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 1,
- "se_ratio": 0.25,
- },
- {
- "kernel_size": 5,
- "repeats": 4,
- "filters_in": 112,
- "filters_out": 192,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 2,
- "se_ratio": 0.25,
- },
- {
- "kernel_size": 3,
- "repeats": 1,
- "filters_in": 192,
- "filters_out": 320,
- "expand_ratio": 6,
- "id_skip": True,
- "strides": 1,
- "se_ratio": 0.25,
- },
-]
-
-CONV_KERNEL_INITIALIZER = {
- "class_name": "VarianceScaling",
- "config": {
- "scale": 2.0,
- "mode": "fan_out",
- "distribution": "truncated_normal",
- },
-}
-
-DENSE_KERNEL_INITIALIZER = {
- "class_name": "VarianceScaling",
- "config": {
- "scale": 1.0 / 3.0,
- "mode": "fan_out",
- "distribution": "uniform",
- },
-}
-
-BASE_DOCSTRING = """Instantiates the {name} architecture.
-
- Reference:
- - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
- (ICML 2019)
-
- This class represents a Keras image classification model.
-
- For image classification use cases, see
- [this page for detailed examples](https://keras.io/api/applications/#usage-examples-for-image-classification-models).
-
- For transfer learning use cases, make sure to read the
- [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
-
- Args:
- include_rescaling: bool, whether to rescale the inputs. If set to
- True, inputs will be passed through a `Rescaling(1/255.0)` layer.
- include_top: bool, Whether to include the fully-connected layer at the
- top of the network.
- weights: One of `None` (random initialization), or the path to the
- weights file to be loaded.
- input_shape: tuple, Optional shape tuple. It should have exactly 3
- inputs channels.
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to
- use as image input for the model.
- pooling: Optional pooling mode for feature extraction when `include_top`
- is `False`, defaults to None.
- - `None` means that the output of the model will be the 4D tensor
- output of the last convolutional layer.
- - `avg` means that global average pooling will be applied to the
- output of the last convolutional layer, and thus the output of
- the model will be a 2D tensor.
- - `max` means that global max pooling will be applied.
- num_classes: int, Optional number of classes to classify images into,
- only to be specified if `include_top` is True, and if no `weights`
- argument is specified, defaults to None.
- classifier_activation: A `str` or callable. The activation function to
- use on the "top" layer. Ignored unless `include_top=True`. Set
- `classifier_activation=None` to return the logits of the "top"
- layer. Defaults to 'softmax'. When loading pretrained weights,
- `classifier_activation` can only be `None` or `"softmax"`.
-
- Returns:
- A `keras.Model` instance.
-""" # noqa: E501
-
-BN_AXIS = 3
-
-
-def correct_pad(inputs, kernel_size):
- """Returns a tuple for zero-padding for 2D convolution with downsampling.
- Args:
- inputs: Input tensor.
- kernel_size: An integer or tuple/list of 2 integers.
- Returns:
- A tuple.
- """
- img_dim = 1
- input_size = backend.int_shape(inputs)[img_dim : (img_dim + 2)]
- if isinstance(kernel_size, int):
- kernel_size = (kernel_size, kernel_size)
- if input_size[0] is None:
- adjust = (1, 1)
- else:
- adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
- correct = (kernel_size[0] // 2, kernel_size[1] // 2)
- return (
- (correct[0] - adjust[0], correct[0]),
- (correct[1] - adjust[1], correct[1]),
- )
-
-
-def apply_conv_bn(
- x,
- conv_type,
- filters,
- kernel_size,
- strides=1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- bn_norm=True,
- activation="swish",
- name="",
-):
- """
- Represents Convolutional Block with optional Batch Normalization layer and
- activation layer
-
- Args:
- x: Tensor
- conv_type: str, Type of Conv layer to be used in block.
- - 'normal': The Conv2D layer will be used.
- - 'depth': The DepthWiseConv2D layer will be used.
- filters: int, The filter size of the Conv layer. It should be `None`
- when `conv_type` is set as `depth`
- kernel_size: int (or) tuple, The kernel size of the Conv layer.
- strides: int (or) tuple, The stride value of Conv layer.
- padding: str (or) callable, The type of padding for Conv layer.
- use_bias: bool, Boolean to use bias for Conv layer.
- kernel_initializer: dict (or) str (or) callable, The kernel initializer
- for Conv layer.
- bn_norm: bool, Boolean to add BatchNormalization layer after Conv layer.
- activation: str (or) callable, Activation to be applied on the output at
- the end.
- name: str, name of the block
-
- Returns:
- tf.Tensor
- """
- if conv_type == "normal":
- if filters is None or kernel_size is None:
- raise ValueError(
- "The filter size and kernel size should be set for Conv2D "
- "layer."
- )
- x = layers.Conv2D(
- filters,
- kernel_size,
- strides=strides,
- padding=padding,
- use_bias=use_bias,
- kernel_initializer=kernel_initializer,
- name=name + "_conv",
- )(x)
- elif conv_type == "depth":
- if filters is not None:
- raise ValueError(
- "Filter size shouldn't be set for DepthWiseConv2D layer."
- )
- if kernel_size is None or strides is None:
- raise ValueError(
- "The kernel size and strides should be set for DepthWiseConv2D "
- "layer."
- )
- x = layers.DepthwiseConv2D(
- kernel_size,
- strides=strides,
- padding=padding,
- use_bias=use_bias,
- depthwise_initializer=kernel_initializer,
- name=name + "_dwconv",
- )(x)
- else:
- raise ValueError(
- "The 'conv_type' parameter should be set either to 'normal' or "
- "'depth'"
- )
-
- if bn_norm:
- x = layers.BatchNormalization(axis=BN_AXIS, name=name + "_bn")(x)
- if activation is not None:
- x = layers.Activation(activation, name=name + "_activation")(x)
-
- return x
-
-
-def apply_efficientnet_block(
- inputs,
- filters_in=32,
- filters_out=16,
- kernel_size=3,
- strides=1,
- activation="swish",
- expand_ratio=1,
- se_ratio=0.0,
- id_skip=True,
- drop_rate=0.0,
- name="",
-):
- """An inverted residual block.
-
- Args:
- inputs: Tensor, The input tensor of the block
- filters_in: integer, the number of input filters.
- filters_out: integer, the number of output filters.
- kernel_size: integer, the dimension of the convolution window.
- strides: integer, the stride of the convolution.
- activation: activation function.
- expand_ratio: integer, scaling coefficient for the input filters.
- se_ratio: float between 0 and 1, fraction to squeeze the input filters.
- id_skip: boolean.
- drop_rate: float between 0 and 1, fraction of the input units to drop.
- name: string, block label.
-
- Returns:
- tf.Tensor
- """
- filters = filters_in * expand_ratio
- if expand_ratio != 1:
- x = apply_conv_bn(
- x=inputs,
- conv_type="normal",
- filters=filters,
- kernel_size=1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- bn_norm=True,
- activation=activation,
- name=name + "_expand",
- )
- else:
- x = inputs
-
- # Depthwise Convolution
- if strides == 2:
- x = layers.ZeroPadding2D(
- padding=correct_pad(x, kernel_size),
- name=name + "_dwconv_pad",
- )(x)
- conv_pad = "valid"
- else:
- conv_pad = "same"
-
- x = apply_conv_bn(
- x=x,
- conv_type="depth",
- filters=None,
- kernel_size=kernel_size,
- strides=strides,
- padding=conv_pad,
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- bn_norm=True,
- activation=activation,
- name=name,
- )
-
- # Squeeze and Excitation phase
- if 0 < se_ratio <= 1:
- filters_se = max(1, int(filters_in * se_ratio))
- se = layers.GlobalAveragePooling2D(name=name + "_se_squeeze")(x)
- if BN_AXIS == 1:
- se_shape = (filters, 1, 1)
- else:
- se_shape = (1, 1, filters)
- se = layers.Reshape(se_shape, name=name + "_se_reshape")(se)
- se = layers.Conv2D(
- filters_se,
- 1,
- padding="same",
- activation=activation,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- name=name + "_se_reduce",
- )(se)
- se = layers.Conv2D(
- filters,
- 1,
- padding="same",
- activation="sigmoid",
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- name=name + "_se_expand",
- )(se)
- x = layers.multiply([x, se], name=name + "_se_excite")
-
- # Output phase
- x = apply_conv_bn(
- x=x,
- conv_type="normal",
- filters=filters_out,
- kernel_size=1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- bn_norm=True,
- activation=None,
- name=name + "_project",
- )
-
- if id_skip and strides == 1 and filters_in == filters_out:
- if drop_rate > 0:
- x = layers.Dropout(
- drop_rate,
- noise_shape=(None, 1, 1, 1),
- name=name + "_drop",
- )(x)
- x = layers.add([x, inputs], name=name + "_add")
-
- return x
-
-
-@keras.utils.register_keras_serializable(package="keras_cv.models")
-class EfficientNet(keras.Model):
- """This class represents a Keras EfficientNet architecture.
- Args:
- include_rescaling: bool, whether to rescale the inputs. If set to
- True, inputs will be passed through a `Rescaling(1/255.0)` layer.
- include_top: bool, whether to include the fully-connected layer at the
- top of the network.
- width_coefficient: float, scaling coefficient for network width.
- depth_coefficient: float, scaling coefficient for network depth.
- default_size: integer, default input image size.
- dropout_rate: float, dropout rate before final classifier layer.
- drop_connect_rate: float, dropout rate at skip connections.
- depth_divisor: integer, a unit of network width.
- activation: activation function.
- blocks_args: list of dicts, parameters to construct block modules.
- model_name: string, model name.
- weights: one of `None` (random initialization), or the path to the
- weights file to be loaded.
- input_shape: optional shape tuple, it should have exactly 3 input
- channels.
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to
- use as image input for the model.
- pooling: optional pooling mode for feature extraction when `include_top`
- is `False`.
- - `None` means that the output of the model will be the 4D tensor
- output of the last convolutional layer.
- - `avg` means that global average pooling will be applied to the
- output of the last convolutional layer, and thus the output of
- the model will be a 2D tensor.
- - `max` means that global max pooling will be applied.
- num_classes: optional number of classes to classify images into,
- only to be specified if `include_top` is True, and if no `weights`
- argument is specified.
- classifier_activation: A `str` or callable. The activation function to
- use on the "top" layer. Ignored unless `include_top=True`. Set
- `classifier_activation=None` to return the logits of the "top"
- layer.
- Returns:
- A `keras.Model` instance.
- Raises:
- ValueError: in case of invalid argument for `weights`, or invalid input
- shape.
- ValueError: if `classifier_activation` is not `softmax` or `None` when
- using a pretrained top layer.
- """
-
- def __init__(
- self,
- include_rescaling,
- include_top,
- width_coefficient,
- depth_coefficient,
- default_size,
- dropout_rate=0.2,
- drop_connect_rate=0.2,
- depth_divisor=8,
- activation="swish",
- blocks_args="default",
- model_name="efficientnet",
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- num_classes=None,
- classifier_activation="softmax",
- **kwargs,
- ):
- blocks_args_type = blocks_args
-
- if blocks_args == "default":
- blocks_args = DEFAULT_BLOCKS_ARGS
-
- if weights and not tf.io.gfile.exists(weights):
- raise ValueError(
- "The `weights` argument should be either `None` or the path to "
- "the weights file to be loaded. Weights file not found at "
- f"location: {weights}"
- )
-
- if include_top and not num_classes:
- raise ValueError(
- "If `include_top` is True, you should specify `num_classes`. "
- f"Received: num_classes={num_classes}"
- )
-
- if include_top and pooling:
- raise ValueError(
- f"`pooling` must be `None` when `include_top=True`."
- f"Received pooling={pooling} and include_top={include_top}. "
- )
-
- img_input = utils.parse_model_inputs(input_shape, input_tensor)
-
- # Build stem
- x = img_input
-
- if include_rescaling:
- # Use common rescaling strategy across keras_cv
- x = layers.Rescaling(1.0 / 255.0)(x)
-
- x = layers.ZeroPadding2D(
- padding=correct_pad(x, 3), name="stem_conv_pad"
- )(x)
-
- x = apply_conv_bn(
- x=x,
- conv_type="normal",
- filters=EfficientNet.round_filters(
- 32, width_coefficient, depth_divisor
- ),
- kernel_size=3,
- strides=2,
- padding="valid",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- bn_norm=True,
- activation=activation,
- name="stem",
- )
-
- # Build blocks
- blocks_args = copy.deepcopy(blocks_args)
-
- b = 0
- blocks = float(
- sum(
- EfficientNet.round_repeats(args["repeats"], depth_coefficient)
- for args in blocks_args
- )
- )
- for i, args in enumerate(blocks_args):
- assert args["repeats"] > 0
- # Update block input and output filters based on depth multiplier.
- args["filters_in"] = EfficientNet.round_filters(
- args["filters_in"], width_coefficient, depth_divisor
- )
- args["filters_out"] = EfficientNet.round_filters(
- args["filters_out"], width_coefficient, depth_divisor
- )
-
- for j in range(
- EfficientNet.round_repeats(
- args.pop("repeats"), depth_coefficient
- )
- ):
- # The first block needs to take care of stride and filter size
- # increase.
- if j > 0:
- args["strides"] = 1
- args["filters_in"] = args["filters_out"]
- x = apply_efficientnet_block(
- inputs=x,
- activation=activation,
- drop_rate=drop_connect_rate * b / blocks,
- name="block{}{}".format(i + 1, chr(j + 97)),
- **args,
- )
- b += 1
-
- # Build top
- x = apply_conv_bn(
- x=x,
- conv_type="normal",
- filters=self.round_filters(1280, width_coefficient, depth_divisor),
- kernel_size=1,
- padding="same",
- use_bias=False,
- kernel_initializer=CONV_KERNEL_INITIALIZER,
- bn_norm=True,
- activation=activation,
- name="top",
- )
-
- if include_top:
- x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
- if dropout_rate > 0:
- x = layers.Dropout(dropout_rate, name="top_dropout")(x)
- x = layers.Dense(
- num_classes,
- activation=classifier_activation,
- kernel_initializer=DENSE_KERNEL_INITIALIZER,
- name="predictions",
- )(x)
- else:
- if pooling == "avg":
- x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
- elif pooling == "max":
- x = layers.GlobalMaxPooling2D(name="max_pool")(x)
-
- inputs = img_input
-
- # Create model.
- super().__init__(inputs=inputs, outputs=x, name=model_name, **kwargs)
-
- # Load weights.
- if weights is not None:
- self.load_weights(weights)
-
- self.include_rescaling = include_rescaling
- self.include_top = include_top
- self.width_coefficient = width_coefficient
- self.depth_coefficient = depth_coefficient
- self.default_size = default_size
- self.dropout_rate = dropout_rate
- self.drop_connect_rate = drop_connect_rate
- self.depth_divisor = depth_divisor
- self.activation = activation
- self.blocks_args = blocks_args_type
- self.input_tensor = input_tensor
- self.pooling = pooling
- self.num_classes = num_classes
- self.classifier_activation = classifier_activation
-
- @staticmethod
- def round_filters(filters, width_coefficient, divisor):
- """Round number of filters based on depth multiplier.
- Args:
- filters: int, number of filters for Conv layer
- width_coefficient: float, denotes the scaling coefficient of network
- width
- divisor: int, a unit of network width
-
- Returns:
- int, new rounded filters value for Conv layer
- """
- filters *= width_coefficient
- new_filters = max(
- divisor, int(filters + divisor / 2) // divisor * divisor
- )
- # Make sure that round down does not go down by more than 10%.
- if new_filters < 0.9 * filters:
- new_filters += divisor
- return int(new_filters)
-
- @staticmethod
- def round_repeats(repeats, depth_coefficient):
- """Round number of repeats based on depth multiplier.
- Args:
- repeats: int, number of repeats of efficientnet block
- depth_coefficient: float, denotes the scaling coefficient of network
- depth
-
- Returns:
- int, rounded repeats
- """
- return int(math.ceil(depth_coefficient * repeats))
-
- def get_config(self):
- return {
- "include_rescaling": self.include_rescaling,
- "include_top": self.include_top,
- "width_coefficient": self.width_coefficient,
- "depth_coefficient": self.depth_coefficient,
- "default_size": self.default_size,
- "dropout_rate": self.dropout_rate,
- "drop_connect_rate": self.drop_connect_rate,
- "depth_divisor": self.depth_divisor,
- "activation": self.activation,
- "blocks_args": self.blocks_args,
- "input_tensor": self.input_tensor,
- "input_shape": self.input_shape[1:],
- "model_name": self.name,
- "pooling": self.pooling,
- "num_classes": self.num_classes,
- "classifier_activation": self.classifier_activation,
- "trainable": self.trainable,
- }
-
- @classmethod
- def from_config(cls, config):
- return cls(**config)
-
-
-def EfficientNetB0(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb0",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.0,
- depth_coefficient=1.0,
- default_size=224,
- dropout_rate=0.2,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb0"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB1(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb1",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.0,
- depth_coefficient=1.1,
- default_size=240,
- dropout_rate=0.2,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb1"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB2(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb2",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.1,
- depth_coefficient=1.2,
- default_size=260,
- dropout_rate=0.3,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb2"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB3(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb3",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.2,
- depth_coefficient=1.4,
- default_size=300,
- dropout_rate=0.3,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb3"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB4(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb4",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.4,
- depth_coefficient=1.8,
- default_size=380,
- dropout_rate=0.4,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb4"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB5(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb5",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.6,
- depth_coefficient=2.2,
- default_size=456,
- dropout_rate=0.4,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb5"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB6(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb6",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=1.8,
- depth_coefficient=2.6,
- default_size=528,
- dropout_rate=0.5,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb6"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-def EfficientNetB7(
- *,
- include_rescaling,
- include_top,
- num_classes=None,
- weights=None,
- input_shape=(None, None, 3),
- input_tensor=None,
- pooling=None,
- classifier_activation="softmax",
- name="efficientnetb7",
- **kwargs,
-):
- return EfficientNet(
- include_rescaling,
- include_top,
- width_coefficient=2.0,
- depth_coefficient=3.1,
- default_size=600,
- dropout_rate=0.5,
- model_name=name,
- weights=parse_weights(weights, include_top, "efficientnetb7"),
- input_shape=input_shape,
- input_tensor=input_tensor,
- pooling=pooling,
- num_classes=num_classes,
- classifier_activation=classifier_activation,
- **kwargs,
- )
-
-
-EfficientNetB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB0")
-EfficientNetB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB1")
-EfficientNetB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB2")
-EfficientNetB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB3")
-EfficientNetB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB4")
-EfficientNetB5.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB5")
-EfficientNetB6.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB6")
-EfficientNetB7.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB7")
diff --git a/keras_cv/models/legacy/efficientnet_v1_test.py b/keras_cv/models/legacy/efficientnet_v1_test.py
deleted file mode 100644
index 7615075618..0000000000
--- a/keras_cv/models/legacy/efficientnet_v1_test.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2022 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from absl.testing import parameterized
-
-from keras_cv.models.legacy import efficientnet_v1
-from keras_cv.tests.test_case import TestCase
-
-from .models_test import ModelsTest
-
-MODEL_LIST = [
- (efficientnet_v1.EfficientNetB0, 1280, {}),
-]
-
-"""
-Below are other configurations that we omit from our CI but that can/should
-be tested manually when making changes to this model.
-(efficientnet_v1.EfficientNetB1, 1280, {}),
-(efficientnet_v1.EfficientNetB2, 1408, {}),
-(efficientnet_v1.EfficientNetB3, 1536, {}),
-(efficientnet_v1.EfficientNetB4, 1792, {}),
-(efficientnet_v1.EfficientNetB5, 2048, {}),
-(efficientnet_v1.EfficientNetB6, 2304, {}),
-(efficientnet_v1.EfficientNetB7, 2560, {}),
-"""
-
-
-class EfficientNetV1Test(ModelsTest, TestCase):
- @parameterized.parameters(*MODEL_LIST)
- def test_application_base(self, app, _, args):
- super()._test_application_base(app, _, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_application_with_rescaling(self, app, last_dim, args):
- super()._test_application_with_rescaling(app, last_dim, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_application_pooling(self, app, last_dim, args):
- super()._test_application_pooling(app, last_dim, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_application_variable_input_channels(self, app, last_dim, args):
- super()._test_application_variable_input_channels(app, last_dim, args)
-
- @parameterized.parameters(*MODEL_LIST)
- def test_model_can_be_used_as_backbone(self, app, last_dim, args):
- super()._test_model_can_be_used_as_backbone(app, last_dim, args)
From 118f502abd38d5b520c0054972e2a2081915d871 Mon Sep 17 00:00:00 2001
From: Bhavesh Misra
Date: Thu, 17 Aug 2023 22:46:24 +0530
Subject: [PATCH 07/17] Issue_1957 Returning the Matplotlib plt object in the
plot_bounding_box_gallery.py file (#2000)
* Updating_the plot_bounding_box_gallery.py
* Tried_plot_bounding_box_gallery
* Tried_plot_bounding_box_gallery
* Trying_passing_classmapping
* returning_plt_object_done
* returning_plt_object_done
* Done_hopefully
* Done_Hopefully_fnal_2
* linting
* Revert "linting"
This reverts commit 64e7e2ab376b8b3505b9c961bf55259b033174c7.
I made a mistake lol
* Linting
* Linting_Donee
* Conditional_Removed
---
examples/visualization/plot_image_gallery.py | 2 +-
keras_cv/visualization/plot_bounding_box_gallery.py | 2 +-
keras_cv/visualization/plot_image_gallery.py | 8 +++-----
3 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/examples/visualization/plot_image_gallery.py b/examples/visualization/plot_image_gallery.py
index 17197aca4d..be8131643c 100644
--- a/examples/visualization/plot_image_gallery.py
+++ b/examples/visualization/plot_image_gallery.py
@@ -5,7 +5,7 @@
Date created: 2022/10/16
Last modified: 2022/06/24
Description: Visualize ground truth and predicted bounding boxes for a given
- dataset.
+ dataset.
"""
"""
diff --git a/keras_cv/visualization/plot_bounding_box_gallery.py b/keras_cv/visualization/plot_bounding_box_gallery.py
index 1f6bd5cf64..73112458bd 100644
--- a/keras_cv/visualization/plot_bounding_box_gallery.py
+++ b/keras_cv/visualization/plot_bounding_box_gallery.py
@@ -174,7 +174,7 @@ def unpackage_tfds_inputs(inputs):
),
]
- plot_image_gallery(
+ return plot_image_gallery(
plotted_images,
value_range,
legend_handles=legend_handles,
diff --git a/keras_cv/visualization/plot_image_gallery.py b/keras_cv/visualization/plot_image_gallery.py
index 1d98c20f53..05cbbad796 100644
--- a/keras_cv/visualization/plot_image_gallery.py
+++ b/keras_cv/visualization/plot_image_gallery.py
@@ -117,9 +117,6 @@ def plot_image_gallery(
"""
assert_matplotlib_installed("plot_bounding_box_gallery")
- if path is None and show is None:
- # Default to showing the image
- show = True
if path is not None and show:
raise ValueError(
"plot_gallery() expects either `path` to be set, or `show` "
@@ -178,8 +175,9 @@ def plot_image_gallery(
current_axis.margins(x=0, y=0)
current_axis.axis("off")
- if path is None and not show:
- return
+ if path is None and show is None:
+ return fig
+
if path is not None:
plt.savefig(
fname=path,
From 38381bad0496129afdf22a6b9f79ce5651b38bc5 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Fri, 18 Aug 2023 02:04:51 +0530
Subject: [PATCH 08/17] [EfficientNetV2 Backbone] Style fix (#2031)
* preset+style fix
* fix
---
.../efficientnet_v2_aliases.py | 99 ++++++++-----------
1 file changed, 43 insertions(+), 56 deletions(-)
diff --git a/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py b/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py
index 6489d5b65b..f338874982 100644
--- a/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py
+++ b/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py
@@ -20,12 +20,9 @@
from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_backbone_presets import ( # noqa: E501
backbone_presets,
)
-from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_backbone_presets import ( # noqa: E501
- backbone_presets_with_weights,
-)
from keras_cv.utils.python_utils import classproperty
-ALIAS_BASE_DOCSTRING = """Instantiates the {name} architecture.
+ALIAS_DOCSTRING = """Instantiates the {name} architecture.
Reference:
- [EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298)
@@ -64,8 +61,8 @@ def __new__(
def presets(cls):
"""Dictionary of preset names and configurations."""
return {
- "efficientnetv2_s": copy.deepcopy(
- backbone_presets["efficientnetv2_s"]
+ "efficientnetv2_s_imagenet": copy.deepcopy(
+ backbone_presets["efficientnetv2_s_imagenet"]
),
}
@@ -73,11 +70,7 @@ def presets(cls):
def presets_with_weights(cls):
"""Dictionary of preset names and configurations that include
weights."""
- return {
- "efficientnetv2_s_imagenet": copy.deepcopy(
- backbone_presets_with_weights["efficientnetv2_s_imagenet"]
- ),
- }
+ return cls.presets
@keras_cv_export("keras_cv.models.EfficientNetV2MBackbone")
@@ -102,11 +95,7 @@ def __new__(
@classproperty
def presets(cls):
"""Dictionary of preset names and configurations."""
- return {
- "efficientnetv2_m": copy.deepcopy(
- backbone_presets["efficientnetv2_m"]
- ),
- }
+ return {}
@classproperty
def presets_with_weights(cls):
@@ -137,11 +126,7 @@ def __new__(
@classproperty
def presets(cls):
"""Dictionary of preset names and configurations."""
- return {
- "efficientnetv2_l": copy.deepcopy(
- backbone_presets["efficientnetv2_l"]
- ),
- }
+ return {}
@classproperty
def presets_with_weights(cls):
@@ -173,8 +158,8 @@ def __new__(
def presets(cls):
"""Dictionary of preset names and configurations."""
return {
- "efficientnetv2_b0": copy.deepcopy(
- backbone_presets["efficientnetv2_b0"]
+ "efficientnetv2_b0_imagenet": copy.deepcopy(
+ backbone_presets["efficientnetv2_b0_imagenet"]
),
}
@@ -182,11 +167,7 @@ def presets(cls):
def presets_with_weights(cls):
"""Dictionary of preset names and configurations that include
weights."""
- return {
- "efficientnetv2_b0_imagenet": copy.deepcopy(
- backbone_presets_with_weights["efficientnetv2_b0_imagenet"]
- ),
- }
+ return cls.presets
@keras_cv_export("keras_cv.models.EfficientNetV2B1Backbone")
@@ -212,8 +193,8 @@ def __new__(
def presets(cls):
"""Dictionary of preset names and configurations."""
return {
- "efficientnetv2_b1": copy.deepcopy(
- backbone_presets["efficientnetv2_b1"]
+ "efficientnetv2_b1_imagenet": copy.deepcopy(
+ backbone_presets["efficientnetv2_b1_imagenet"]
),
}
@@ -221,11 +202,7 @@ def presets(cls):
def presets_with_weights(cls):
"""Dictionary of preset names and configurations that include
weights."""
- return {
- "efficientnetv2_b1_imagenet": copy.deepcopy(
- backbone_presets_with_weights["efficientnetv2_b1_imagenet"]
- ),
- }
+ return cls.presets
@keras_cv_export("keras_cv.models.EfficientNetV2B2Backbone")
@@ -251,8 +228,8 @@ def __new__(
def presets(cls):
"""Dictionary of preset names and configurations."""
return {
- "efficientnetv2_b2": copy.deepcopy(
- backbone_presets["efficientnetv2_b2"]
+ "efficientnetv2_b2_imagenet": copy.deepcopy(
+ backbone_presets["efficientnetv2_b2_imagenet"]
),
}
@@ -260,11 +237,7 @@ def presets(cls):
def presets_with_weights(cls):
"""Dictionary of preset names and configurations that include
weights."""
- return {
- "efficientnetv2_b2_imagenet": copy.deepcopy(
- backbone_presets_with_weights["efficientnetv2_b2_imagenet"]
- ),
- }
+ return cls.presets
@keras_cv_export("keras_cv.models.EfficientNetV2B3Backbone")
@@ -298,24 +271,38 @@ def presets_with_weights(cls):
return {}
-EfficientNetV2B0Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2B0"
+setattr(
+ EfficientNetV2SBackbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2S"),
)
-EfficientNetV2B1Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2B1"
+setattr(
+ EfficientNetV2MBackbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2M"),
)
-EfficientNetV2B2Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2B2"
+setattr(
+ EfficientNetV2LBackbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2L"),
)
-EfficientNetV2B3Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2B3"
+setattr(
+ EfficientNetV2B0Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2B0"),
)
-EfficientNetV2SBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2S"
+setattr(
+ EfficientNetV2B1Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2B1"),
)
-EfficientNetV2MBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2M"
+setattr(
+ EfficientNetV2B2Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2B2"),
)
-EfficientNetV2LBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
- name="EfficientNetV2L"
+setattr(
+ EfficientNetV2B3Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="EfficientNetV2B3"),
)
From 5373b916d15544a6763347575d440570ec617495 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Fri, 18 Aug 2023 13:30:37 -0600
Subject: [PATCH 09/17] Add pre-trained MobileNetV3Small preset (#2034)
---
.../mobilenet_v3_backbone_presets.py | 17 +++++++++++++++++
.../mobilenet_v3_backbone_presets_test.py | 4 ++--
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
index f7cb46171c..8f350fd7b5 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
@@ -200,6 +200,23 @@
"weights_url": "https://storage.googleapis.com/keras-cv/models/mobilenetv3/mobilenetv3_large_imagenet_backbone.h5", # noqa: E501
"weights_hash": "ec55ea2f4f4ee9a2ddf3ee8e2dd784e9d5732690c1fc5afc7e1b2a66703f3337", # noqa: E501
},
+ "mobilenet_v3_small_imagenet": {
+ "metadata": {
+ "description": (
+ "MobileNetV3 model with 28 layers where the batch "
+ "normalization and hard-swish activation are applied after the "
+ "convolution layers. "
+ "Pre-trained on the ImageNet 2012 classification task."
+ ),
+ "params": 2_994_518,
+ "official_name": "MobileNetV3",
+ "path": "mobilenetv3",
+ },
+ "class_name": "keras_cv>MobileNetV3Backbone",
+ "config": backbone_presets_no_weights["mobilenet_v3_small"]["config"],
+ "weights_url": "https://storage.googleapis.com/keras-cv/models/mobilenetv3/mobilenetv3_small_imagenet_backbone.h5", # noqa: E501
+ "weights_hash": "592c2707edfc6c673a3b2d9aaf76dee678557f4a32d573c74f96c8122effa503", # noqa: E501
+ },
}
backbone_presets = {
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py
index 2d36e60166..aa0c806aab 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py
@@ -36,7 +36,7 @@ def setUp(self):
self.input_batch = np.ones(shape=(8, 224, 224, 3))
def test_backbone_output(self):
- model = MobileNetV3Backbone.from_preset("mobilenet_v3_large_imagenet")
+ model = MobileNetV3Backbone.from_preset("mobilenet_v3_small_imagenet")
outputs = model(self.input_batch)
# The forward pass from a preset should be stable!
@@ -45,7 +45,7 @@ def test_backbone_output(self):
# We should only update these numbers if we are updating a weights
# file, or have found a discrepancy with the upstream source.
outputs = outputs[0, 0, 0, :5]
- expected = [0.27, 0.01, 0.29, 0.08, -0.12]
+ expected = [0.25, 1.13, -0.26, 0.10, 0.03]
# Keep a high tolerance, so we are robust to different hardware.
self.assertAllClose(
ops.convert_to_numpy(outputs), expected, atol=0.01, rtol=0.01
From 30bbd60ac455d0415814815d4ce4e0cb5cbdee94 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Mon, 21 Aug 2023 22:02:06 +0530
Subject: [PATCH 10/17] alias fix + doc fix in preset (#2035)
---
.../models/backbones/mobilenet_v3/mobilenet_v3_aliases.py | 8 ++++++--
.../mobilenet_v3/mobilenet_v3_backbone_presets.py | 6 +++---
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py
index 9d9c91e8e0..da19b81f28 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py
@@ -73,12 +73,16 @@ def __new__(
@classproperty
def presets(cls):
"""Dictionary of preset names and configurations."""
- return {}
+ return {
+ "mobilenet_v3_small_imagenet": copy.deepcopy(
+ backbone_presets["mobilenet_v3_small_imagenet"]
+ ),
+ }
@classproperty
def presets_with_weights(cls):
"""Dictionary of preset names and configurations."""
- return {}
+ return cls.presets
@keras_cv_export("keras_cv.models.MobileNetV3LargeBackbone")
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
index 8f350fd7b5..75ad436fe1 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
@@ -191,7 +191,7 @@
"convolution layers. "
"Pre-trained on the ImageNet 2012 classification task."
),
- "params": 2_994_518,
+ "params": 2994518,
"official_name": "MobileNetV3",
"path": "mobilenetv3",
},
@@ -203,12 +203,12 @@
"mobilenet_v3_small_imagenet": {
"metadata": {
"description": (
- "MobileNetV3 model with 28 layers where the batch "
+ "MobileNetV3 model with 14 layers where the batch "
"normalization and hard-swish activation are applied after the "
"convolution layers. "
"Pre-trained on the ImageNet 2012 classification task."
),
- "params": 2_994_518,
+ "params": 933502,
"official_name": "MobileNetV3",
"path": "mobilenetv3",
},
From 96ac1ee72ba27ca0e194886d0d9baa3183630200 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Mon, 21 Aug 2023 22:34:56 +0530
Subject: [PATCH 11/17] keras.layers.add->keras.layers.Add (#2033)
---
keras_cv/layers/fusedmbconv.py | 2 +-
keras_cv/layers/mbconv.py | 2 +-
.../backbones/efficientnet_lite/efficientnet_lite_backbone.py | 2 +-
.../backbones/efficientnet_v1/efficientnet_v1_backbone.py | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/keras_cv/layers/fusedmbconv.py b/keras_cv/layers/fusedmbconv.py
index 489c619dbb..2ac33a54df 100644
--- a/keras_cv/layers/fusedmbconv.py
+++ b/keras_cv/layers/fusedmbconv.py
@@ -218,7 +218,7 @@ def call(self, inputs):
if self.strides == 1 and self.input_filters == self.output_filters:
if self.survival_probability:
x = self.dropout(x)
- x = keras.layers.add([x, inputs], name=self.name + "add")
+ x = keras.layers.Add(name=self.name + "add")([x, inputs])
return x
def get_config(self):
diff --git a/keras_cv/layers/mbconv.py b/keras_cv/layers/mbconv.py
index fba5b0194d..34a7e0c8d2 100644
--- a/keras_cv/layers/mbconv.py
+++ b/keras_cv/layers/mbconv.py
@@ -226,7 +226,7 @@ def call(self, inputs):
if self.strides == 1 and self.input_filters == self.output_filters:
if self.survival_probability:
x = self.dropout(x)
- x = keras.layers.add([x, inputs], name=self.name + "add")
+ x = keras.layers.Add(name=self.name + "add")([x, inputs])
return x
def get_config(self):
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
index d3a6fd8815..fe8eab1a08 100644
--- a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
@@ -362,5 +362,5 @@ def apply_efficient_net_lite_block(
x = keras.layers.Dropout(
dropout_rate, noise_shape=(None, 1, 1, 1), name=name + "drop"
)(x)
- x = keras.layers.add([x, inputs], name=name + "add")
+ x = keras.layers.Add(name=name + "add")([x, inputs])
return x
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
index cc39d3d31c..c90bebf198 100644
--- a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
@@ -449,6 +449,6 @@ def apply_efficientnet_block(
noise_shape=(None, 1, 1, 1),
name=name + "drop",
)(x)
- x = keras.layers.add([x, inputs], name=name + "add")
+ x = keras.layers.Add(name=name + "add")([x, inputs])
return x
From f3c629451a59519ef7714d5fb1bf936db68a58a3 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Wed, 23 Aug 2023 01:19:01 +0530
Subject: [PATCH 12/17] Added support of segmentation mask in RandomShear Layer
(#2021)
* seg mask support
* format
* add test
* add demo
* fix
* update readme
* review comment
* fix
---
.../segmentation/random_shear_demo.py | 34 +++++++++++++++++++
keras_cv/layers/preprocessing/README.md | 2 +-
keras_cv/layers/preprocessing/random_shear.py | 27 +++++++++++++++
.../layers/preprocessing/random_shear_test.py | 22 +++++++++++-
4 files changed, 83 insertions(+), 2 deletions(-)
create mode 100644 examples/layers/preprocessing/segmentation/random_shear_demo.py
diff --git a/examples/layers/preprocessing/segmentation/random_shear_demo.py b/examples/layers/preprocessing/segmentation/random_shear_demo.py
new file mode 100644
index 0000000000..1a78a0c8fb
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_shear_demo.py
@@ -0,0 +1,34 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_shear_demo.py shows how to use the RandomShear preprocessing layer.
+
+Uses the oxford iiit pet_dataset. In this script the pets
+are loaded, then are passed through the preprocessing layers.
+Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+ ds = demo_utils.load_oxford_iiit_pet_dataset()
+ randomshear = preprocessing.RandomShear(0.5, 0.5)
+ ds = ds.map(randomshear, num_parallel_calls=tf.data.AUTOTUNE)
+ demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index b3fef9e96c..4f077d8cec 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -37,7 +37,7 @@ The provided table gives an overview of the different augmentation layers availa
| RandomRotation | ✅ | ✅ | ✅ | ✅ |
| RandomSaturation | ✅ | ✅ | ✅ | ✅ |
| RandomSharpness | ✅ | ✅ | ✅ | ✅ |
-| RandomShear | ✅ | ❌ | ✅ | ✅ |
+| RandomShear | ✅ | ✅ | ✅ | ✅ |
| RandomTranslation | ✅ | ✅ | ✅ | ✅ |
| RandomZoom | ✅ | ❌ | ❌ | ✅ |
| RepeatedAugmentation + | - | - | - | - |
diff --git a/keras_cv/layers/preprocessing/random_shear.py b/keras_cv/layers/preprocessing/random_shear.py
index 7c20a9f8d5..dc69288d70 100644
--- a/keras_cv/layers/preprocessing/random_shear.py
+++ b/keras_cv/layers/preprocessing/random_shear.py
@@ -219,6 +219,33 @@ def _build_shear_y_transform_matrix(shear_y):
def augment_labels(self, labels, transformations, **kwargs):
return labels
+ def augment_segmentation_masks(
+ self, segmentation_masks, transformations, **kwargs
+ ):
+ x, y = transformations["shear_x"], transformations["shear_y"]
+
+ if x is not None:
+ transforms_x = self._build_shear_x_transform_matrix(x)
+ segmentation_masks = preprocessing.transform(
+ images=segmentation_masks,
+ transforms=transforms_x,
+ interpolation="nearest",
+ fill_mode=self.fill_mode,
+ fill_value=self.fill_value,
+ )
+
+ if y is not None:
+ transforms_y = self._build_shear_y_transform_matrix(y)
+ segmentation_masks = preprocessing.transform(
+ images=segmentation_masks,
+ transforms=transforms_y,
+ interpolation="nearest",
+ fill_mode=self.fill_mode,
+ fill_value=self.fill_value,
+ )
+
+ return segmentation_masks
+
def augment_bounding_boxes(
self, bounding_boxes, transformations, images=None, **kwargs
):
diff --git a/keras_cv/layers/preprocessing/random_shear_test.py b/keras_cv/layers/preprocessing/random_shear_test.py
index 180e6a16a7..51933b7f0b 100644
--- a/keras_cv/layers/preprocessing/random_shear_test.py
+++ b/keras_cv/layers/preprocessing/random_shear_test.py
@@ -27,19 +27,33 @@ def test_aggressive_shear_fills_at_least_some_pixels(self):
[2 * tf.ones(img_shape), tf.ones(img_shape)],
axis=0,
)
+ ys_segmentation_masks = tf.stack(
+ [2 * tf.ones(img_shape), tf.ones(img_shape)],
+ axis=0,
+ )
xs = tf.cast(xs, tf.float32)
+ ys_segmentation_masks = tf.cast(ys_segmentation_masks, tf.float32)
fill_value = 0.0
layer = preprocessing.RandomShear(
x_factor=(3, 3), seed=0, fill_mode="constant", fill_value=fill_value
)
xs = layer(xs)
+ ys_segmentation_masks = layer(ys_segmentation_masks)
# Some pixels should be replaced with fill value
self.assertTrue(tf.math.reduce_any(xs[0] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
+ self.assertTrue(
+ tf.math.reduce_any(ys_segmentation_masks[0] == fill_value)
+ )
+ self.assertTrue(tf.math.reduce_any(ys_segmentation_masks[0] == 2.0))
+ self.assertTrue(
+ tf.math.reduce_any(ys_segmentation_masks[1] == fill_value)
+ )
+ self.assertTrue(tf.math.reduce_any(ys_segmentation_masks[1] == 1.0))
def test_return_shapes(self):
"""test return dict keys and value pairs"""
@@ -55,6 +69,9 @@ def test_return_shapes(self):
"classes": tf.random.uniform((2, 3), 0, 1),
}
+ # randomly sample segmentation masks
+ ys_segmentation_masks = tf.ones((2, 512, 512, 3))
+
layer = preprocessing.RandomShear(
x_factor=(0.1, 0.3),
y_factor=(0.1, 0.3),
@@ -68,18 +85,21 @@ def test_return_shapes(self):
"images": xs,
"targets": ys_labels,
"bounding_boxes": ys_bounding_boxes,
+ "segmentation_masks": ys_segmentation_masks,
}
)
- xs, ys_labels, ys_bounding_boxes = (
+ xs, ys_labels, ys_bounding_boxes, ys_segmentation_masks = (
outputs["images"],
outputs["targets"],
outputs["bounding_boxes"],
+ outputs["segmentation_masks"],
)
ys_bounding_boxes = bounding_box.to_dense(ys_bounding_boxes)
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertEqual(ys_labels.shape, [2, 10])
self.assertEqual(ys_bounding_boxes["boxes"].shape, [2, 3, 4])
self.assertEqual(ys_bounding_boxes["classes"].shape, [2, 3])
+ self.assertEqual(ys_segmentation_masks.shape, [2, 512, 512, 3])
def test_single_image_input(self):
"""test for single image input"""
From c740f81b59fbf3830c2f0c0131e84e872b4022f1 Mon Sep 17 00:00:00 2001
From: Aritra Roy Gosthipaty
Date: Wed, 23 Aug 2023 01:46:19 +0530
Subject: [PATCH 13/17] [RandomZoom] Supporting Segmentation Masks (#2010)
* chore: initial commit
* chore: adding initial tests
* update: readme for preprocessing
* fix lint
---
.../segmentation/random_zoom_demo.py | 33 +++++++++++++++++
keras_cv/layers/preprocessing/README.md | 2 +-
keras_cv/layers/preprocessing/random_zoom.py | 25 +++++++++++++
.../layers/preprocessing/random_zoom_test.py | 37 ++++++++++++++-----
4 files changed, 86 insertions(+), 11 deletions(-)
create mode 100644 examples/layers/preprocessing/segmentation/random_zoom_demo.py
diff --git a/examples/layers/preprocessing/segmentation/random_zoom_demo.py b/examples/layers/preprocessing/segmentation/random_zoom_demo.py
new file mode 100644
index 0000000000..17a6f84536
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_zoom_demo.py
@@ -0,0 +1,33 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_zoom_demo.py shows how to use the RandomZoom preprocessing layer.
+Uses the oxford iiit pet_dataset. In this script the pets
+are loaded, then are passed through the preprocessing layers.
+Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+ ds = demo_utils.load_oxford_iiit_pet_dataset()
+ randomzoom = preprocessing.RandomZoom(0.5, 0.5)
+ ds = ds.map(randomzoom, num_parallel_calls=tf.data.AUTOTUNE)
+ demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index 4f077d8cec..62f0b00f0d 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -39,7 +39,7 @@ The provided table gives an overview of the different augmentation layers availa
| RandomSharpness | ✅ | ✅ | ✅ | ✅ |
| RandomShear | ✅ | ✅ | ✅ | ✅ |
| RandomTranslation | ✅ | ✅ | ✅ | ✅ |
-| RandomZoom | ✅ | ❌ | ❌ | ✅ |
+| RandomZoom | ✅ | ✅ | ❌ | ✅ |
| RepeatedAugmentation + | - | - | - | - |
| Rescaling | ❌ | ✅ | ✅ | ✅ |
| Resizing | ❌ | ✅ | ✅ | ❌ |
diff --git a/keras_cv/layers/preprocessing/random_zoom.py b/keras_cv/layers/preprocessing/random_zoom.py
index 98b08782bc..2e860da649 100644
--- a/keras_cv/layers/preprocessing/random_zoom.py
+++ b/keras_cv/layers/preprocessing/random_zoom.py
@@ -193,6 +193,31 @@ def augment_images(self, images, transformations, **kwargs):
def augment_labels(self, labels, transformations, **kwargs):
return labels
+ def augment_segmentation_masks(
+ self, segmentation_masks, transformations, **kwargs
+ ):
+ segmentation_masks = preprocessing_utils.ensure_tensor(
+ segmentation_masks, self.compute_dtype
+ )
+ original_shape = segmentation_masks.shape
+ mask_shape = tf.shape(segmentation_masks)
+ mask_hd = tf.cast(mask_shape[H_AXIS], tf.float32)
+ mask_wd = tf.cast(mask_shape[W_AXIS], tf.float32)
+ width_zooms = transformations["width_zooms"]
+ height_zooms = transformations["height_zooms"]
+ zooms = tf.cast(
+ tf.concat([width_zooms, height_zooms], axis=1), dtype=tf.float32
+ )
+ outputs = preprocessing_utils.transform(
+ segmentation_masks,
+ self.get_zoom_matrix(zooms, mask_hd, mask_wd),
+ fill_mode=self.fill_mode,
+ fill_value=self.fill_value,
+ interpolation="nearest",
+ )
+ outputs.set_shape(original_shape)
+ return outputs
+
def get_zoom_matrix(self, zooms, image_height, image_width, name=None):
"""Returns projective transform(s) for the given zoom(s).
diff --git a/keras_cv/layers/preprocessing/random_zoom_test.py b/keras_cv/layers/preprocessing/random_zoom_test.py
index 219bc55779..0fdcf6eec3 100644
--- a/keras_cv/layers/preprocessing/random_zoom_test.py
+++ b/keras_cv/layers/preprocessing/random_zoom_test.py
@@ -35,20 +35,37 @@ def test_output_shapes(self, height_factor, width_factor):
orig_height = 5
orig_width = 8
channels = 3
- input = tf.random.uniform(
- shape=[num_samples, orig_height, orig_width, channels],
- )
+ input = {
+ "images": tf.random.uniform(
+ shape=[num_samples, orig_height, orig_width, channels],
+ ),
+ "segmentation_masks": tf.random.uniform(
+ shape=[num_samples, orig_height, orig_width, 1],
+ minval=0,
+ maxval=2,
+ ),
+ }
layer = RandomZoom(height_factor, width_factor)
actual_output = layer(input)
- expected_output = tf.random.uniform(
- shape=(
- num_samples,
- orig_height,
- orig_width,
- channels,
+ expected_output = {
+ "images": tf.random.uniform(
+ shape=[num_samples, orig_height, orig_width, channels],
),
+ "segmentation_masks": tf.random.uniform(
+ shape=[num_samples, orig_height, orig_width, 1],
+ minval=0,
+ maxval=2,
+ ),
+ }
+ # Check output shape of images
+ self.assertAllEqual(
+ expected_output["images"].shape, actual_output["images"].shape
+ )
+ # Check output shape of segmentation masks
+ self.assertAllEqual(
+ expected_output["segmentation_masks"].shape,
+ actual_output["segmentation_masks"].shape,
)
- self.assertAllEqual(expected_output.shape, actual_output.shape)
def test_random_zoom_in_numeric(self):
for dtype in (np.int64, np.float32):
From d01aee44a73c228c1981ffd913fd7a96ad40b0c0 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Wed, 23 Aug 2023 02:11:44 +0530
Subject: [PATCH 14/17] Add support of segmentation mask in RandomCutout
(#2004)
* added support of segmentation-mask
* added demo
* add test
* update readme
* random cutout removed from mask
* update test
---
.../segmentation/random_cutout_demo.py | 34 +++++++++++++++++++
keras_cv/layers/preprocessing/README.md | 2 +-
.../layers/preprocessing/random_cutout.py | 5 +++
.../preprocessing/random_cutout_test.py | 6 ++++
4 files changed, 46 insertions(+), 1 deletion(-)
create mode 100644 examples/layers/preprocessing/segmentation/random_cutout_demo.py
diff --git a/examples/layers/preprocessing/segmentation/random_cutout_demo.py b/examples/layers/preprocessing/segmentation/random_cutout_demo.py
new file mode 100644
index 0000000000..71c3631b9b
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_cutout_demo.py
@@ -0,0 +1,34 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_cutout_demo.py shows how to use the RandomCutout preprocessing layer.
+
+Uses the oxford iiit pet_dataset. In this script the pets
+are loaded, then are passed through the preprocessing layers.
+Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+ ds = demo_utils.load_oxford_iiit_pet_dataset()
+ randomcutout = preprocessing.RandomCutout(0.5, 0.5)
+ ds = ds.map(randomcutout, num_parallel_calls=tf.data.AUTOTUNE)
+ demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index 62f0b00f0d..2e4eedf2fd 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -29,7 +29,7 @@ The provided table gives an overview of the different augmentation layers availa
| RandomContrast | ✅ | ✅ | ✅ | ✅ |
| RandomCropAndResize | ❌ | ✅ | ✅ | ❌ |
| RandomCrop | ✅ | ❌ | ✅ | ✅ |
-| RandomCutout | ❌ | ❌ | ❌ | ✅ |
+| RandomCutout | ❌ | ✅ | ❌ | ✅ |
| RandomFlip | ✅ | ✅ | ✅ | ✅ |
| RandomGaussianBlur | ❌ | ✅ | ✅ | ✅ |
| RandomHue | ✅ | ✅ | ✅ | ✅ |
diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py
index 24aed8c455..4eb4bb4a24 100644
--- a/keras_cv/layers/preprocessing/random_cutout.py
+++ b/keras_cv/layers/preprocessing/random_cutout.py
@@ -120,6 +120,11 @@ def augment_image(self, image, transformation=None, **kwargs):
def augment_label(self, label, transformation=None, **kwargs):
return label
+ def augment_segmentation_mask(
+ self, segmentation_masks, transformation=None, **kwargs
+ ):
+ return segmentation_masks
+
def _compute_rectangle_position(self, inputs):
input_shape = tf.shape(inputs)
image_height, image_width = (
diff --git a/keras_cv/layers/preprocessing/random_cutout_test.py b/keras_cv/layers/preprocessing/random_cutout_test.py
index 818486749b..14930b6fd1 100644
--- a/keras_cv/layers/preprocessing/random_cutout_test.py
+++ b/keras_cv/layers/preprocessing/random_cutout_test.py
@@ -45,23 +45,29 @@ def _run_test(self, height_factor, width_factor):
def test_return_shapes(self):
xs = np.ones((2, 512, 512, 3))
+ ys_segmentation_masks = np.ones((2, 512, 512, 3))
layer = preprocessing.RandomCutout(
height_factor=0.5, width_factor=0.5, seed=1
)
xs = layer(xs)
+ ys_segmentation_masks = layer(ys_segmentation_masks)
self.assertEqual(xs.shape, [2, 512, 512, 3])
+ self.assertEqual(ys_segmentation_masks.shape, [2, 512, 512, 3])
def test_return_shapes_single_element(self):
xs = np.ones((512, 512, 3))
+ ys_segmentation_masks = np.ones((512, 512, 3))
layer = preprocessing.RandomCutout(
height_factor=0.5, width_factor=0.5, seed=1
)
xs = layer(xs)
+ ys_segmentation_masks = layer(ys_segmentation_masks)
self.assertEqual(xs.shape, [512, 512, 3])
+ self.assertEqual(ys_segmentation_masks.shape, [512, 512, 3])
def test_random_cutout_single_float(self):
self._run_test(0.5, 0.5)
From b038f583164fdc950acfffb599fa57f77057cc87 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Thu, 24 Aug 2023 14:34:20 -0600
Subject: [PATCH 15/17] Remove forward slashes from layer names for backbones
(#2037)
---
.../backbones/densenet/densenet_backbone.py | 6 +++---
.../mobilenet_v3/mobilenet_v3_backbone.py | 16 ++++++++--------
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/keras_cv/models/backbones/densenet/densenet_backbone.py b/keras_cv/models/backbones/densenet/densenet_backbone.py
index 98b9eea0e5..28109b64fa 100644
--- a/keras_cv/models/backbones/densenet/densenet_backbone.py
+++ b/keras_cv/models/backbones/densenet/densenet_backbone.py
@@ -88,12 +88,12 @@ def __init__(
x = keras.layers.Rescaling(1 / 255.0)(x)
x = keras.layers.Conv2D(
- 64, 7, strides=2, use_bias=False, padding="same", name="conv1/conv"
+ 64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv"
)(x)
x = keras.layers.BatchNormalization(
- axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1/bn"
+ axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1_bn"
)(x)
- x = keras.layers.Activation("relu", name="conv1/relu")(x)
+ x = keras.layers.Activation("relu", name="conv1_relu")(x)
x = keras.layers.MaxPooling2D(
3, strides=2, padding="same", name="pool1"
)(x)
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py
index f92d177e99..bd033c282f 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py
@@ -126,7 +126,7 @@ def __init__(
axis=CHANNEL_AXIS,
epsilon=BN_EPSILON,
momentum=BN_MOMENTUM,
- name="Conv/BatchNorm",
+ name="Conv_BatchNorm",
)(x)
x = apply_hard_swish(x)
@@ -161,7 +161,7 @@ def __init__(
axis=CHANNEL_AXIS,
epsilon=BN_EPSILON,
momentum=BN_MOMENTUM,
- name="Conv_1/BatchNorm",
+ name="Conv_1_BatchNorm",
)(x)
x = apply_hard_swish(x)
@@ -291,11 +291,11 @@ def apply_inverted_res_block(
activation = keras.activations.get(activation)
shortcut = x
- prefix = "expanded_conv/"
+ prefix = "expanded_conv_"
infilters = x.shape[CHANNEL_AXIS]
if expansion_index > 0:
- prefix = f"expanded_conv_{expansion_index}/"
+ prefix = f"expanded_conv_{expansion_index}_"
x = keras.layers.Conv2D(
adjust_channels(infilters * expansion),
@@ -308,14 +308,14 @@ def apply_inverted_res_block(
axis=CHANNEL_AXIS,
epsilon=BN_EPSILON,
momentum=BN_MOMENTUM,
- name=prefix + "expand/BatchNorm",
+ name=prefix + "expand_BatchNorm",
)(x)
x = activation(x)
if stride == 2:
x = keras.layers.ZeroPadding2D(
padding=utils.correct_pad_downsample(x, kernel_size),
- name=prefix + "depthwise/pad",
+ name=prefix + "depthwise_pad",
)(x)
x = keras.layers.DepthwiseConv2D(
@@ -329,7 +329,7 @@ def apply_inverted_res_block(
axis=CHANNEL_AXIS,
epsilon=BN_EPSILON,
momentum=BN_MOMENTUM,
- name=prefix + "depthwise/BatchNorm",
+ name=prefix + "depthwise_BatchNorm",
)(x)
x = activation(x)
@@ -353,7 +353,7 @@ def apply_inverted_res_block(
axis=CHANNEL_AXIS,
epsilon=BN_EPSILON,
momentum=BN_MOMENTUM,
- name=prefix + "project/BatchNorm",
+ name=prefix + "project_BatchNorm",
)(x)
if stride == 1 and infilters == filters:
From ab812d1b4d6414fac0100238c982472dc5d6e8c6 Mon Sep 17 00:00:00 2001
From: David Landup <60978046+DavidLandup0@users.noreply.github.com>
Date: Fri, 25 Aug 2023 00:56:39 +0200
Subject: [PATCH 16/17] [DeepVision Port] SegFormer and Mix-Transformers
(#1946)
* initial dump
* add all basic layers, port roughly to keras core ops
* updated .gitignore
* segformer head and formatting
* cleanup
* remove tf call
* remove tf
* migrating to more keras ops
* cleanups and fixes
* fix reshaping
* comments
* from presets api, keras.ops -> ops
* embed_dims -> embedding_dims
* addressing some PR comments
* docstrings, argument update
* depths arg
* sync
* compute output shapes
* segformer progress
* head
* softmax
* remove softmax
* undo compute_output_shapes()
* efficientmultiheadattention -> segformermultiheadattention
* docstrings
* softmax output
* segformer presets
* updating segformer presets
* segformer presets
* import aliases
* refactoring
* pr comments
* pr comments
* add aliases
* aliases ot init
* refactor fix
* import keras_cv_export
* fix presets/aliases and add copyright
* linter warnings
* linter errors
* consistency in presets
* return config
* fix serialization
* Some cleanup + more tests
* Fix DropPath layer (need to update tests + add shim for tf.keras
* Finish DropPath layer
* Use static shape in backbone
* Formatting
* Switch back to ops.shape
* documentation
* documentation
* remove default num classes
* fix docs
---------
Co-authored-by: ianjjohnson <3072903+ianstenbit@users.noreply.github.com>
---
.gitignore | 1 +
keras_cv/backend/__init__.py | 1 +
keras_cv/backend/random.py | 20 ++
keras_cv/layers/__init__.py | 9 +
.../hierarchical_transformer_encoder.py | 140 ++++++++++
.../layers/overlapping_patching_embedding.py | 85 ++++++
keras_cv/layers/regularization/drop_path.py | 20 +-
.../layers/regularization/drop_path_test.py | 18 +-
.../layers/segformer_multihead_attention.py | 132 +++++++++
keras_cv/models/__init__.py | 28 ++
.../backbones/mix_transformer/__init__.py | 13 +
.../mix_transformer_aliases.py | 262 ++++++++++++++++++
.../mix_transformer_backbone.py | 188 +++++++++++++
.../mix_transformer_backbone_presets.py | 153 ++++++++++
.../mix_transformer_backbone_presets_test.py | 100 +++++++
.../mix_transformer_backbone_test.py | 69 +++++
keras_cv/models/segmentation/__init__.py | 1 +
.../models/segmentation/segformer/__init__.py | 15 +
.../segmentation/segformer/segformer.py | 175 ++++++++++++
.../segformer/segformer_aliases.py | 244 ++++++++++++++++
.../segformer/segformer_presets.py | 105 +++++++
.../segmentation/segformer/segformer_test.py | 92 ++++++
22 files changed, 1855 insertions(+), 16 deletions(-)
create mode 100644 keras_cv/backend/random.py
create mode 100644 keras_cv/layers/hierarchical_transformer_encoder.py
create mode 100644 keras_cv/layers/overlapping_patching_embedding.py
create mode 100644 keras_cv/layers/segformer_multihead_attention.py
create mode 100644 keras_cv/models/backbones/mix_transformer/__init__.py
create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py
create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py
create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py
create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py
create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py
create mode 100644 keras_cv/models/segmentation/segformer/__init__.py
create mode 100644 keras_cv/models/segmentation/segformer/segformer.py
create mode 100644 keras_cv/models/segmentation/segformer/segformer_aliases.py
create mode 100644 keras_cv/models/segmentation/segformer/segformer_presets.py
create mode 100644 keras_cv/models/segmentation/segformer/segformer_test.py
diff --git a/.gitignore b/.gitignore
index 6a59b32803..68d68189bd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ __pycache__/
.vscode/
.devcontainer/
.coverage
+.history
diff --git a/keras_cv/backend/__init__.py b/keras_cv/backend/__init__.py
index da703722b9..7440acbd38 100644
--- a/keras_cv/backend/__init__.py
+++ b/keras_cv/backend/__init__.py
@@ -76,6 +76,7 @@
from keras_cv.backend import config # noqa: E402
from keras_cv.backend import ops # noqa: E402
+from keras_cv.backend import random # noqa: E402
from keras_cv.backend import tf_ops # noqa: E402
diff --git a/keras_cv/backend/random.py b/keras_cv/backend/random.py
new file mode 100644
index 0000000000..21d4b08c7d
--- /dev/null
+++ b/keras_cv/backend/random.py
@@ -0,0 +1,20 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.backend.config import multi_backend
+
+if multi_backend():
+ from keras_core.random import * # noqa: F403, F401
+else:
+ from keras_core.src.backend.tensorflow.random import * # noqa: F403, F401
diff --git a/keras_cv/layers/__init__.py b/keras_cv/layers/__init__.py
index c8b01f2769..342a942f64 100644
--- a/keras_cv/layers/__init__.py
+++ b/keras_cv/layers/__init__.py
@@ -19,6 +19,9 @@
from keras_cv.layers.augmenter import Augmenter
from keras_cv.layers.feature_pyramid import FeaturePyramid
from keras_cv.layers.fusedmbconv import FusedMBConvBlock
+from keras_cv.layers.hierarchical_transformer_encoder import (
+ HierarchicalTransformerEncoder,
+)
from keras_cv.layers.mbconv import MBConvBlock
from keras_cv.layers.object_detection.anchor_generator import AnchorGenerator
from keras_cv.layers.object_detection.box_matcher import BoxMatcher
@@ -32,6 +35,9 @@
CenterNetLabelEncoder,
)
from keras_cv.layers.object_detection_3d.voxelization import DynamicVoxelization
+from keras_cv.layers.overlapping_patching_embedding import (
+ OverlappingPatchingAndEmbedding,
+)
from keras_cv.layers.preprocessing.aug_mix import AugMix
from keras_cv.layers.preprocessing.auto_contrast import AutoContrast
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
@@ -124,6 +130,9 @@
from keras_cv.layers.regularization.dropblock_2d import DropBlock2D
from keras_cv.layers.regularization.squeeze_excite import SqueezeAndExcite2D
from keras_cv.layers.regularization.stochastic_depth import StochasticDepth
+from keras_cv.layers.segformer_multihead_attention import (
+ SegFormerMultiheadAttention,
+)
from keras_cv.layers.spatial_pyramid import SpatialPyramidPooling
from keras_cv.layers.transformer_encoder import TransformerEncoder
from keras_cv.layers.vit_layers import PatchingAndEmbedding
diff --git a/keras_cv/layers/hierarchical_transformer_encoder.py b/keras_cv/layers/hierarchical_transformer_encoder.py
new file mode 100644
index 0000000000..ee67a17b56
--- /dev/null
+++ b/keras_cv/layers/hierarchical_transformer_encoder.py
@@ -0,0 +1,140 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.layers.regularization.drop_path import DropPath
+from keras_cv.layers.segformer_multihead_attention import (
+ SegFormerMultiheadAttention,
+)
+
+
+@keras_cv_export("keras_cv.layers.HierarchicalTransformerEncoder")
+class HierarchicalTransformerEncoder(keras.layers.Layer):
+ """
+ Hierarchical transformer encoder block implementation as a Keras Layer.
+ The layer uses `SegFormerMultiheadAttention` as a `MultiHeadAttention`
+ alternative for computational efficiency, and is meant to be used
+ within the SegFormer architecture.
+
+ References:
+ - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501
+ - [Official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501
+ - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/hierarchical_transformer_encoder.py) # noqa: E501
+
+ Args:
+ project_dim: integer, the dimensionality of the projection of the
+ encoder, and output of the `SegFormerMultiheadAttention` layer.
+ Due to the residual addition the input dimensionality has to be
+ equal to the output dimensionality.
+ num_heads: integer, the number of heads for the
+ `SegFormerMultiheadAttention` layer.
+ drop_prob: float, the probability of dropping a random
+ sample using the `DropPath` layer. Defaults to `0.0`.
+ layer_norm_epsilon: float, the epsilon for
+ `LayerNormalization` layers. Defaults to `1e-06`
+ sr_ratio: integer, the ratio to use within
+ `SegFormerMultiheadAttention`. If set to > 1, a `Conv2D`
+ layer is used to reduce the length of the sequence. Defaults to `1`.
+
+ Basic usage:
+
+ ```
+ project_dim = 1024
+ num_heads = 4
+ patch_size = 16
+
+ encoded_patches = keras_cv.layers.OverlappingPatchingAndEmbedding(
+ project_dim=project_dim, patch_size=patch_size)(img_batch)
+
+ trans_encoded = keras_cv.layers.HierarchicalTransformerEncoder(project_dim=project_dim,
+ num_heads=num_heads,
+ sr_ratio=1)(encoded_patches)
+
+ print(trans_encoded.shape) # (1, 3136, 1024)
+ ```
+ """
+
+ def __init__(
+ self,
+ project_dim,
+ num_heads,
+ sr_ratio=1,
+ drop_prob=0.0,
+ layer_norm_epsilon=1e-6,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+ self.project_dim = project_dim
+ self.num_heads = num_heads
+ self.drop_prop = drop_prob
+
+ self.norm1 = keras.layers.LayerNormalization(epsilon=layer_norm_epsilon)
+ self.attn = SegFormerMultiheadAttention(
+ project_dim, num_heads, sr_ratio
+ )
+ self.drop_path = DropPath(drop_prob)
+ self.norm2 = keras.layers.LayerNormalization(epsilon=layer_norm_epsilon)
+ self.mlp = self.MixFFN(
+ channels=project_dim,
+ mid_channels=int(project_dim * 4),
+ )
+
+ def build(self, input_shape):
+ super().build(input_shape)
+ self.H = ops.sqrt(ops.cast(input_shape[1], "float32"))
+ self.W = ops.sqrt(ops.cast(input_shape[2], "float32"))
+
+ def call(self, x):
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
+
+ def get_config(self):
+ config = super().get_config()
+ config.update(
+ {
+ "mlp": keras.saving.serialize_keras_object(self.mlp),
+ "project_dim": self.project_dim,
+ "num_heads": self.num_heads,
+ "drop_prop": self.drop_prop,
+ }
+ )
+ return config
+
+ class MixFFN(keras.layers.Layer):
+ def __init__(self, channels, mid_channels):
+ super().__init__()
+ self.fc1 = keras.layers.Dense(mid_channels)
+ self.dwconv = keras.layers.DepthwiseConv2D(
+ kernel_size=3,
+ strides=1,
+ padding="same",
+ )
+ self.fc2 = keras.layers.Dense(channels)
+
+ def call(self, x):
+ x = self.fc1(x)
+ shape = ops.shape(x)
+ H, W = int(math.sqrt(shape[1])), int(math.sqrt(shape[1]))
+ B, C = shape[0], shape[2]
+ x = ops.reshape(x, (B, H, W, C))
+ x = self.dwconv(x)
+ x = ops.reshape(x, (B, -1, C))
+ x = ops.nn.gelu(x)
+ x = self.fc2(x)
+ return x
diff --git a/keras_cv/layers/overlapping_patching_embedding.py b/keras_cv/layers/overlapping_patching_embedding.py
new file mode 100644
index 0000000000..69060087ec
--- /dev/null
+++ b/keras_cv/layers/overlapping_patching_embedding.py
@@ -0,0 +1,85 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+
+
+@keras_cv_export("keras_cv.layers.OverlappingPatchingAndEmbedding")
+class OverlappingPatchingAndEmbedding(keras.layers.Layer):
+ def __init__(self, project_dim=32, patch_size=7, stride=4, **kwargs):
+ """
+ Overlapping Patching and Embedding layer. Differs from `PatchingAndEmbedding`
+ in that the patch size does not affect the sequence length. It's fully derived
+ from the `stride` parameter. Additionally, no positional embedding is done
+ as part of the layer - only a projection using a `Conv2D` layer.
+
+ References:
+ - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501
+ - [Official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501
+ - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/hierarchical_transformer_encoder.py) # noqa: E501
+
+ Args:
+ project_dim: integer, the dimensionality of the projection.
+ Defaults to `32`.
+ patch_size: integer, the size of the patches to encode.
+ Defaults to `7`.
+ stride: integer, the stride to use for the patching before
+ projection. Defaults to `5`.
+
+ Basic usage:
+
+ ```
+ project_dim = 1024
+ patch_size = 16
+
+ encoded_patches = keras_cv.layers.OverlappingPatchingAndEmbedding(
+ project_dim=project_dim, patch_size=patch_size)(img_batch)
+
+ print(encoded_patches.shape) # (1, 3136, 1024)
+ ```
+ """
+ super().__init__(**kwargs)
+
+ self.project_dim = project_dim
+ self.patch_size = patch_size
+ self.stride = stride
+
+ self.proj = keras.layers.Conv2D(
+ filters=project_dim,
+ kernel_size=patch_size,
+ strides=stride,
+ padding="same",
+ )
+ self.norm = keras.layers.LayerNormalization()
+
+ def call(self, x):
+ x = self.proj(x)
+ # B, H, W, C
+ shape = x.shape
+ x = ops.reshape(x, (-1, shape[1] * shape[2], shape[3]))
+ x = self.norm(x)
+ return x
+
+ def get_config(self):
+ config = super().get_config()
+ config.update(
+ {
+ "project_dim": self.project_dim,
+ "patch_size": self.patch_size,
+ "stride": self.stride,
+ }
+ )
+ return config
diff --git a/keras_cv/layers/regularization/drop_path.py b/keras_cv/layers/regularization/drop_path.py
index e254f29493..4475e2365f 100644
--- a/keras_cv/layers/regularization/drop_path.py
+++ b/keras_cv/layers/regularization/drop_path.py
@@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from tensorflow import keras
-
from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.backend import random
@keras_cv_export("keras_cv.layers.DropPath")
-class DropPath(keras.__internal__.layers.BaseRandomLayer):
+class DropPath(keras.layers.Layer):
"""
Implements the DropPath layer. DropPath randomly drops samples during
training with a probability of `rate`. Note that this layer drops individual
@@ -47,7 +48,7 @@ class DropPath(keras.__internal__.layers.BaseRandomLayer):
""" # noqa: E501
def __init__(self, rate=0.5, seed=None, **kwargs):
- super().__init__(seed=seed, **kwargs)
+ super().__init__(**kwargs)
self.rate = rate
self.seed = seed
@@ -55,12 +56,13 @@ def call(self, x, training=None):
if self.rate == 0.0 or not training:
return x
else:
- keep_prob = 1 - self.rate
- drop_map_shape = (x.shape[0],) + (1,) * (len(x.shape) - 1)
- drop_map = keras.backend.random_bernoulli(
- drop_map_shape, p=keep_prob, seed=self.seed
+ batch_size = x.shape[0] or ops.shape(x)[0]
+ drop_map_shape = (batch_size,) + (1,) * (len(x.shape) - 1)
+ drop_map = ops.cast(
+ random.uniform(drop_map_shape, seed=self.seed) > self.rate,
+ x.dtype,
)
- x = x / keep_prob
+ x = x / (1.0 - self.rate)
x = x * drop_map
return x
diff --git a/keras_cv/layers/regularization/drop_path_test.py b/keras_cv/layers/regularization/drop_path_test.py
index 22f63b5223..00b4b790f0 100644
--- a/keras_cv/layers/regularization/drop_path_test.py
+++ b/keras_cv/layers/regularization/drop_path_test.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import numpy as np
+import pytest
import tensorflow as tf
from keras_cv.layers import DropPath
@@ -23,7 +25,7 @@ class DropPathTest(TestCase):
def test_input_unchanged_in_eval_mode(self):
layer = DropPath(rate=0.5, seed=42)
- inputs = tf.random.uniform(self.FEATURE_SHAPE)
+ inputs = np.random.uniform(size=self.FEATURE_SHAPE)
outputs = layer(inputs, training=False)
@@ -31,7 +33,7 @@ def test_input_unchanged_in_eval_mode(self):
def test_input_unchanged_with_rate_equal_to_zero(self):
layer = DropPath(rate=0, seed=42)
- inputs = tf.random.uniform(self.FEATURE_SHAPE)
+ inputs = np.random.uniform(size=self.FEATURE_SHAPE)
outputs = layer(inputs, training=True)
@@ -39,7 +41,7 @@ def test_input_unchanged_with_rate_equal_to_zero(self):
def test_input_gets_partially_zeroed_out_in_train_mode(self):
layer = DropPath(rate=0.2, seed=42)
- inputs = tf.random.uniform(self.FEATURE_SHAPE)
+ inputs = np.random.uniform(size=self.FEATURE_SHAPE)
outputs = layer(inputs, training=True)
@@ -48,9 +50,11 @@ def test_input_gets_partially_zeroed_out_in_train_mode(self):
self.assertGreaterEqual(non_zeros_inputs, non_zeros_outputs)
+ # Because randomness is inconsistent across backends, we just test with 1.
+ @pytest.mark.tf_keras_only
def test_strict_input_gets_partially_zeroed_out_in_train_mode(self):
- layer = DropPath(rate=0.5, seed=42)
- inputs = tf.random.uniform(self.FEATURE_SHAPE)
+ layer = DropPath(rate=0.5, seed=10)
+ inputs = np.random.uniform(size=self.FEATURE_SHAPE)
total_non_zero_inputs = 0
total_non_zero_outputs = 0
@@ -66,6 +70,6 @@ def test_strict_input_gets_partially_zeroed_out_in_train_mode(self):
self.assertAllInRange(
total_non_zero_outputs,
- int(0.49 * tf.cast(total_non_zero_inputs, tf.float32)),
- int(0.51 * tf.cast(total_non_zero_inputs, tf.float32)),
+ int(0.40 * tf.cast(total_non_zero_inputs, tf.float32)),
+ int(0.60 * tf.cast(total_non_zero_inputs, tf.float32)),
)
diff --git a/keras_cv/layers/segformer_multihead_attention.py b/keras_cv/layers/segformer_multihead_attention.py
new file mode 100644
index 0000000000..203773d4ea
--- /dev/null
+++ b/keras_cv/layers/segformer_multihead_attention.py
@@ -0,0 +1,132 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+
+
+@keras_cv_export("keras_cv.layers.SegFormerMultiheadAttention")
+class SegFormerMultiheadAttention(keras.layers.Layer):
+ def __init__(self, project_dim, num_heads, sr_ratio):
+ """
+ Efficient MultiHeadAttention implementation as a Keras layer.
+ A huge bottleneck in scaling transformers is the self-attention layer
+ with an O(n^2) complexity.
+
+ SegFormerMultiheadAttention performs a sequence reduction (SR) operation
+ with a given ratio, to reduce the sequence length before performing key and value projections,
+ reducing the O(n^2) complexity to O(n^2/R) where R is the sequence reduction ratio.
+
+ References:
+ - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501
+ - [NVlabs' official implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501
+ - [@sithu31296's reimplementation](https://github.com/sithu31296/semantic-segmentation/blob/main/semseg/models/backbones/mit.py) # noqa: E501
+ - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/efficient_attention.py) # noqa: E501
+
+ Args:
+ project_dim: integer, the dimensionality of the projection
+ of the `SegFormerMultiheadAttention` layer.
+ num_heads: integer, the number of heads to use in the
+ attention computation.
+ sr_ratio: integer, the sequence reduction ratio to perform
+ on the sequence before key and value projections.
+
+ Basic usage:
+
+ ```
+ tensor = tf.random.uniform([1, 196, 32])
+ output = keras_cv.layers.SegFormerMultiheadAttention(project_dim=768,
+ num_heads=2,
+ sr_ratio=4)(tensor)
+ print(output.shape) # (1, 196, 32)
+ ```
+ """
+ super().__init__()
+ self.num_heads = num_heads
+ self.sr_ratio = sr_ratio
+ self.scale = (project_dim // num_heads) ** -0.5
+ self.q = keras.layers.Dense(project_dim)
+ self.k = keras.layers.Dense(project_dim)
+ self.v = keras.layers.Dense(project_dim)
+ self.proj = keras.layers.Dense(project_dim)
+
+ if sr_ratio > 1:
+ self.sr = keras.layers.Conv2D(
+ filters=project_dim,
+ kernel_size=sr_ratio,
+ strides=sr_ratio,
+ padding="same",
+ )
+ self.norm = keras.layers.LayerNormalization()
+
+ def call(self, x):
+ input_shape = ops.shape(x)
+ H, W = int(math.sqrt(input_shape[1])), int(math.sqrt(input_shape[1]))
+ B, C = input_shape[0], input_shape[2]
+
+ q = self.q(x)
+ q = ops.reshape(
+ q,
+ (
+ input_shape[0],
+ input_shape[1],
+ self.num_heads,
+ input_shape[2] // self.num_heads,
+ ),
+ )
+ q = ops.transpose(q, [0, 2, 1, 3])
+
+ if self.sr_ratio > 1:
+ x = ops.reshape(
+ ops.transpose(x, [0, 2, 1]),
+ (B, H, W, C),
+ )
+ x = self.sr(x)
+ x = ops.reshape(x, [input_shape[0], input_shape[2], -1])
+ x = ops.transpose(x, [0, 2, 1])
+ x = self.norm(x)
+
+ k = self.k(x)
+ v = self.v(x)
+
+ k = ops.transpose(
+ ops.reshape(
+ k,
+ [B, -1, self.num_heads, C // self.num_heads],
+ ),
+ [0, 2, 1, 3],
+ )
+
+ v = ops.transpose(
+ ops.reshape(
+ v,
+ [B, -1, self.num_heads, C // self.num_heads],
+ ),
+ [0, 2, 1, 3],
+ )
+
+ attn = (q @ ops.transpose(k, [0, 1, 3, 2])) * self.scale
+ attn = ops.nn.softmax(attn, axis=-1)
+
+ attn = attn @ v
+ attn = ops.reshape(
+ ops.transpose(attn, [0, 2, 1, 3]),
+ [input_shape[0], input_shape[1], input_shape[2]],
+ )
+
+ x = self.proj(attn)
+ return x
diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py
index 4191c07575..9c83a3891a 100644
--- a/keras_cv/models/__init__.py
+++ b/keras_cv/models/__init__.py
@@ -112,6 +112,27 @@
from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import (
EfficientNetV2SBackbone,
)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB0Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB1Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB2Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB3Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB4Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB5Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTBackbone,
+)
from keras_cv.models.backbones.mobilenet_v3.mobilenet_v3_aliases import (
MobileNetV3LargeBackbone,
)
@@ -166,5 +187,12 @@
YOLOV8Detector,
)
from keras_cv.models.segmentation import DeepLabV3Plus
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormer
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB0
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB1
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB2
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB3
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB4
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB5
from keras_cv.models.stable_diffusion import StableDiffusion
from keras_cv.models.stable_diffusion import StableDiffusionV2
diff --git a/keras_cv/models/backbones/mix_transformer/__init__.py b/keras_cv/models/backbones/mix_transformer/__init__.py
new file mode 100644
index 0000000000..3992ffb59a
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py
new file mode 100644
index 0000000000..7c7ea6a8b6
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py
@@ -0,0 +1,262 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import (
+ MiTBackbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501
+ backbone_presets,
+)
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """MiT model.
+
+ For transfer learning use cases, make sure to read the
+ [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
+
+ Args:
+ include_rescaling: bool, whether to rescale the inputs. If set to
+ True, inputs will be passed through a `Rescaling(scale=1 / 255)`
+ layer. Defaults to True.
+ input_shape: optional shape tuple, defaults to (None, None, 3).
+ input_tensor: optional Keras tensor (i.e., output of `layers.Input()`)
+ to use as image input for the model.
+
+ Examples:
+ ```python
+ input_data = tf.ones(shape=(8, 224, 224, 3))
+
+ # Randomly initialized backbone
+ model = {name}Backbone()
+ output = model(input_data)
+ ```
+""" # noqa: E501
+
+
+class MiTB0Backbone(MiTBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return MiTBackbone.from_preset("mit_b0", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "mit_b0_imagenet": copy.deepcopy(
+ backbone_presets["mit_b0_imagenet"]
+ ),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+class MiTB1Backbone(MiTBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return MiTBackbone.from_preset("mit_b1", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+
+class MiTB2Backbone(MiTBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return MiTBackbone.from_preset("mit_b2", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+
+class MiTB3Backbone(MiTBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return MiTBackbone.from_preset("mit_b3", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+
+class MiTB4Backbone(MiTBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return MiTBackbone.from_preset("mit_b4", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+
+class MiTB5Backbone(MiTBackbone):
+ def __new__(
+ cls,
+ include_rescaling=True,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "include_rescaling": include_rescaling,
+ "input_shape": input_shape,
+ "input_tensor": input_tensor,
+ }
+ )
+ return MiTBackbone.from_preset("mit_b5", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations."""
+ return {}
+
+
+setattr(
+ MiTB0Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="MiTB0"),
+)
+
+setattr(
+ MiTB1Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="MiTB1"),
+)
+
+setattr(
+ MiTB2Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="MiTB2"),
+)
+
+setattr(
+ MiTB3Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="MiTB3"),
+)
+
+setattr(
+ MiTB4Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="MiTB4"),
+)
+
+setattr(
+ MiTB5Backbone,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="MiTB5"),
+)
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py
new file mode 100644
index 0000000000..bf6a1a6ec2
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py
@@ -0,0 +1,188 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MiT backbone model.
+
+References:
+ - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021)
+ - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/models/classification/mix_transformer/mit_tf.py)
+ - [Based on the NVlabs' official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py)
+ - [Inspired by @sithu31296's reimplementation](https://github.com/sithu31296/semantic-segmentation/blob/main/semseg/models/backbones/mit.py)
+""" # noqa: E501
+
+import copy
+
+import numpy as np
+
+from keras_cv import layers as cv_layers
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.models import utils
+from keras_cv.models.backbones.backbone import Backbone
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501
+ backbone_presets,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501
+ backbone_presets_with_weights,
+)
+from keras_cv.utils.python_utils import classproperty
+
+
+@keras_cv_export("keras_cv.models.MiTBackbone")
+class MiTBackbone(Backbone):
+ def __init__(
+ self,
+ include_rescaling,
+ depths,
+ input_shape=(224, 224, 3),
+ input_tensor=None,
+ embedding_dims=None,
+ **kwargs,
+ ):
+ """A Keras model implementing the MixTransformer architecture to be
+ used as a backbone for the SegFormer architecture.
+
+ References:
+ - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501
+ - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/classification/mix_transformer) # noqa: E501
+
+ Args:
+ include_rescaling: bool, whether to rescale the inputs. If set
+ to `True`, inputs will be passed through a `Rescaling(1/255.0)`
+ layer.
+ depths: the number of transformer encoders to be used per stage in the
+ network
+ embedding_dims: the embedding dims per hierarchical stage, used as
+ the levels of the feature pyramid
+ input_shape: optional shape tuple, defaults to (None, None, 3).
+ input_tensor: optional Keras tensor (i.e. output of `keras.layers.Input()`)
+ to use as image input for the model.
+
+ Examples:
+
+ Using the class with a `backbone`:
+
+ ```python
+ import tensorflow as tf
+ import keras_cv
+
+ images = np.ones(shape=(1, 96, 96, 3))
+ labels = np.zeros(shape=(1, 96, 96, 1))
+ backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet")
+
+ # Evaluate model
+ model(images)
+
+ # Train model
+ model.compile(
+ optimizer="adam",
+ loss=keras.losses.BinaryCrossentropy(from_logits=False),
+ metrics=["accuracy"],
+ )
+ model.fit(images, labels, epochs=3)
+ ```
+ """
+ drop_path_rate = 0.1
+ dpr = [x for x in np.linspace(0.0, drop_path_rate, sum(depths))]
+ blockwise_num_heads = [1, 2, 5, 8]
+ blockwise_sr_ratios = [8, 4, 2, 1]
+ num_stages = 4
+
+ cur = 0
+ patch_embedding_layers = []
+ transformer_blocks = []
+ layer_norms = []
+
+ for i in range(num_stages):
+ patch_embed_layer = cv_layers.OverlappingPatchingAndEmbedding(
+ project_dim=embedding_dims[0] if i == 0 else embedding_dims[i],
+ patch_size=7 if i == 0 else 3,
+ stride=4 if i == 0 else 2,
+ name=f"patch_and_embed_{i}",
+ )
+ patch_embedding_layers.append(patch_embed_layer)
+
+ transformer_block = [
+ cv_layers.HierarchicalTransformerEncoder(
+ project_dim=embedding_dims[i],
+ num_heads=blockwise_num_heads[i],
+ sr_ratio=blockwise_sr_ratios[i],
+ drop_prob=dpr[cur + k],
+ name=f"hierarchical_encoder_{i}_{k}",
+ )
+ for k in range(depths[i])
+ ]
+ transformer_blocks.append(transformer_block)
+ cur += depths[i]
+ layer_norms.append(keras.layers.LayerNormalization())
+
+ inputs = utils.parse_model_inputs(input_shape, input_tensor)
+ x = inputs
+
+ if include_rescaling:
+ x = keras.layers.Rescaling(scale=1 / 255)(x)
+
+ pyramid_level_inputs = []
+ for i in range(num_stages):
+ # Compute new height/width after the `proj`
+ # call in `OverlappingPatchingAndEmbedding`
+ stride = 4 if i == 0 else 2
+ new_height, new_width = (
+ int(ops.shape(x)[1] / stride),
+ int(ops.shape(x)[2] / stride),
+ )
+
+ x = patch_embedding_layers[i](x)
+ for blk in transformer_blocks[i]:
+ x = blk(x)
+ x = layer_norms[i](x)
+ x = keras.layers.Reshape(
+ (new_height, new_width, -1), name=f"output_level_{i}"
+ )(x)
+ pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+ super().__init__(inputs=inputs, outputs=x, **kwargs)
+
+ self.depths = depths
+ self.embedding_dims = embedding_dims
+ self.include_rescaling = include_rescaling
+ self.input_tensor = input_tensor
+ self.pyramid_level_inputs = {
+ f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs)
+ }
+
+ def get_config(self):
+ config = super().get_config()
+ config.update(
+ {
+ "depths": self.depths,
+ "embedding_dims": self.embedding_dims,
+ "include_rescaling": self.include_rescaling,
+ "input_shape": self.input_shape[1:],
+ "input_tensor": self.input_tensor,
+ }
+ )
+ return config
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return copy.deepcopy(backbone_presets)
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return copy.deepcopy(backbone_presets_with_weights)
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py
new file mode 100644
index 0000000000..a4c1c2a3e1
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py
@@ -0,0 +1,153 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MiT model preset configurations."""
+
+backbone_presets_no_weights = {
+ "mit_b0": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 8 transformer blocks."
+ ),
+ "params": 3321962,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [32, 64, 160, 256],
+ "depths": [2, 2, 2, 2],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ },
+ "mit_b1": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 8 transformer blocks."
+ ),
+ "params": 13156554,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [64, 128, 320, 512],
+ "depths": [2, 2, 2, 2],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ },
+ "mit_b2": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 16 transformer blocks."
+ ),
+ "params": 24201418,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [64, 128, 320, 512],
+ "depths": [3, 4, 6, 3],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ },
+ "mit_b3": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 28 transformer blocks."
+ ),
+ "params": 44077258,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [64, 128, 320, 512],
+ "depths": [3, 4, 18, 3],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ },
+ "mit_b4": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 41 transformer blocks."
+ ),
+ "params": 60847818,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [64, 128, 320, 512],
+ "depths": [3, 8, 27, 3],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ },
+ "mit_b5": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 52 transformer blocks."
+ ),
+ "params": 81448138,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [64, 128, 320, 512],
+ "depths": [3, 6, 40, 3],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ },
+}
+
+backbone_presets_with_weights = {
+ "mit_b0_imagenet": {
+ "metadata": {
+ "description": (
+ "MiT (MixTransformer) model with 8 transformer blocks. Pre-trained on ImageNet-1K and scores 69% top-1 accuracy on the validation set." # noqa: E501
+ ),
+ "params": 3321962,
+ "official_name": "MiT",
+ "path": "mit",
+ },
+ "class_name": "keras_cv>MiTBackbone",
+ "config": {
+ "embedding_dims": [32, 64, 160, 256],
+ "depths": [2, 2, 2, 2],
+ "include_rescaling": True,
+ "input_shape": (224, 224, 3),
+ "input_tensor": None,
+ },
+ "weights_url": "https://storage.googleapis.com/keras-cv/models/mitb0/imagenet/classification-v0.h5", # noqa: E501
+ "weights_hash": "8e0c416cd330b6fa0bcfb3a5ccc43edcbcabf6a463aee3c2a9b6a1398c207d10", # noqa: E501
+ },
+}
+
+backbone_presets = {
+ **backbone_presets_no_weights,
+ **backbone_presets_with_weights,
+}
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py
new file mode 100644
index 0000000000..0bc443ee92
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py
@@ -0,0 +1,100 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for loading pretrained model presets."""
+
+import numpy as np
+import pytest
+
+from keras_cv.backend import ops
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB0Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import (
+ MiTBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+
+
+@pytest.mark.large
+class MixTransformerPresetSmokeTest(TestCase):
+ """
+ A smoke test for MixTransformer presets we run continuously.
+ This only tests the smallest weights we have available. Run with:
+ `pytest keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py --run_large` # noqa: E501
+ """
+
+ def setUp(self):
+ self.input_batch = np.ones(shape=(2, 224, 224, 3))
+
+ def test_backbone_output(self):
+ model = MiTBackbone.from_preset("mit_b0")
+ model(self.input_batch)
+
+ def test_backbone_output_with_weights(self):
+ model = MiTBackbone.from_preset("mit_b0_imagenet")
+
+ # The forward pass from a preset should be stable!
+ # This test should catch cases where we unintentionally change our
+ # network code in a way that would invalidate our preset weights.
+ # We should only update these numbers if we are updating a weights
+ # file, or have found a discrepancy with the upstream source.
+
+ outputs = model(np.ones(shape=(1, 224, 224, 3)))
+ expected = [-0.603472, -0.180627, -1.92137, -0.004339, 2.396384]
+ # Keep a high tolerance, so we are robust to different hardware.
+ self.assertAllClose(
+ ops.convert_to_numpy(outputs[0, 0, 0, :5]),
+ expected,
+ atol=0.01,
+ rtol=0.01,
+ )
+
+ def test_applications_model_output(self):
+ model = MiTB0Backbone()
+ model(self.input_batch)
+
+ def test_applications_model_output_with_preset(self):
+ model = MiTB0Backbone.from_preset("mit_b0_imagenet")
+ model(self.input_batch)
+
+ def test_preset_docstring(self):
+ """Check we did our docstring formatting correctly."""
+ for name in MiTBackbone.presets:
+ self.assertRegex(MiTBackbone.from_preset.__doc__, name)
+
+ def test_unknown_preset_error(self):
+ # Not a preset name
+ with self.assertRaises(ValueError):
+ MiTBackbone.from_preset("mit_b0_clowntown")
+
+ def test_load_weights_error(self):
+ # Try to load weights when none available
+ with self.assertRaises(ValueError):
+ MiTBackbone.from_preset("mit_b0", load_weights=True)
+
+
+@pytest.mark.extra_large
+class MixTransformerPresetFullTest(TestCase):
+ """
+ Test the full enumeration of our preset.
+ This tests every preset for Mix Transformer and is only run manually.
+ Run with:
+ `pytest keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py --run_extra_large` # noqa: E501
+ """
+
+ def test_load_mix_transformer(self):
+ input_data = np.ones(shape=(2, 224, 224, 3))
+ for preset in MiTBackbone.presets:
+ model = MiTBackbone.from_preset(preset)
+ model(input_data)
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py
new file mode 100644
index 0000000000..f24596bdfe
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py
@@ -0,0 +1,69 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+ MiTB0Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import (
+ MiTBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+
+
+class MixTransformerBackboneTest(TestCase):
+ def setUp(self):
+ self.input_batch = np.ones(shape=(2, 224, 224, 3))
+
+ def test_valid_call(self):
+ model = MiTB0Backbone()
+ model(self.input_batch)
+
+ @pytest.mark.large # Saving is slow, so mark these large.
+ def test_saved_model(self):
+ model = MiTB0Backbone(
+ include_rescaling=False,
+ )
+ model_output = model(self.input_batch)
+ save_path = os.path.join(self.get_temp_dir(), "mit_backbone.keras")
+ model.save(save_path)
+ restored_model = keras.models.load_model(save_path)
+
+ # Check we got the real object back.
+ self.assertIsInstance(restored_model, MiTBackbone)
+
+ # Check that output matches.
+ restored_output = restored_model(self.input_batch)
+ self.assertAllClose(
+ ops.convert_to_numpy(model_output),
+ ops.convert_to_numpy(restored_output),
+ )
+
+ @parameterized.named_parameters(
+ ("one_channel", 1),
+ ("four_channels", 4),
+ )
+ def test_application_variable_input_channels(self, num_channels):
+ model = MiTB0Backbone(
+ input_shape=(224, 224, num_channels),
+ include_rescaling=False,
+ )
+ self.assertEqual(model.output_shape, (None, 7, 7, 256))
diff --git a/keras_cv/models/segmentation/__init__.py b/keras_cv/models/segmentation/__init__.py
index 122dc4191e..f25ee4ea7c 100644
--- a/keras_cv/models/segmentation/__init__.py
+++ b/keras_cv/models/segmentation/__init__.py
@@ -13,3 +13,4 @@
# limitations under the License.
from keras_cv.models.segmentation.deeplab_v3_plus import DeepLabV3Plus
+from keras_cv.models.segmentation.segformer import SegFormer
diff --git a/keras_cv/models/segmentation/segformer/__init__.py b/keras_cv/models/segmentation/segformer/__init__.py
new file mode 100644
index 0000000000..59d29582c2
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.models.segmentation.segformer.segformer import SegFormer
diff --git a/keras_cv/models/segmentation/segformer/segformer.py b/keras_cv/models/segmentation/segformer/segformer.py
new file mode 100644
index 0000000000..0985b13749
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer.py
@@ -0,0 +1,175 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.models.segmentation.segformer.segformer_presets import ( # noqa: E501
+ presets,
+)
+from keras_cv.models.segmentation.segformer.segformer_presets import ( # noqa: E501
+ presets_with_weights,
+)
+from keras_cv.models.task import Task
+from keras_cv.utils.python_utils import classproperty
+from keras_cv.utils.train import get_feature_extractor
+
+
+@keras_cv_export("keras_cv.models.segmentation.SegFormer")
+class SegFormer(Task):
+ """A Keras model implementing the SegFormer architecture for semantic
+ segmentation.
+
+ References:
+ - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501
+ - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501
+
+ Args:
+ backbone: `keras.Model`. The backbone network for the model that is
+ used as a feature extractor for the SegFormer encoder.
+ It is *intended* to be used only with the MiT backbone model which
+ was created specifically for SegFormers. It should either be a
+ `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model`
+ that implements the `pyramid_level_inputs` property with keys
+ "P2", "P3", "P4", and "P5" and layer names as
+ values.
+ num_classes: int, the number of classes for the detection model,
+ including the background class.
+ projection_filters: int, number of filters in the
+ convolution layer projecting the concatenated features into
+ a segmentation map. Defaults to 256`.
+
+ Examples:
+
+ Using the class with a `backbone`:
+
+ ```python
+ import tensorflow as tf
+ import keras_cv
+
+ images = np.ones(shape=(1, 96, 96, 3))
+ labels = np.zeros(shape=(1, 96, 96, 1))
+ backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet")
+ model = keras_cv.models.segmentation.SegFormer(
+ num_classes=1, backbone=backbone,
+ )
+
+ # Evaluate model
+ model(images)
+
+ # Train model
+ model.compile(
+ optimizer="adam",
+ loss=keras.losses.BinaryCrossentropy(from_logits=False),
+ metrics=["accuracy"],
+ )
+ model.fit(images, labels, epochs=3)
+ ```
+ """
+
+ def __init__(
+ self,
+ backbone,
+ num_classes,
+ projection_filters=256,
+ **kwargs,
+ ):
+ if not isinstance(backbone, keras.layers.Layer) or not isinstance(
+ backbone, keras.Model
+ ):
+ raise ValueError(
+ "Argument `backbone` must be a `keras.layers.Layer` instance "
+ f" or `keras.Model`. Received instead "
+ f"backbone={backbone} (of type {type(backbone)})."
+ )
+
+ inputs = backbone.input
+
+ feature_extractor = get_feature_extractor(
+ backbone, list(backbone.pyramid_level_inputs.values())
+ )
+ # Multi-level dictionary
+ features = list(feature_extractor(inputs).values())
+
+ # Get H and W of level one output
+ _, H, W, _ = features[0].shape
+ # Project all multi-level outputs onto the same dimensionality
+ # and feature map shape
+ multi_layer_outs = []
+ for feature_dim, feature in zip(backbone.embedding_dims, features):
+ out = keras.layers.Dense(
+ projection_filters, name=f"linear_{feature_dim}"
+ )(feature)
+ out = keras.layers.Resizing(H, W, interpolation="bilinear")(out)
+ multi_layer_outs.append(out)
+
+ # Concat now-equal feature maps
+ concatenated_outs = keras.layers.Concatenate(axis=3)(
+ multi_layer_outs[::-1]
+ )
+
+ # Fuse concatenated features into a segmentation map
+ seg = keras.Sequential(
+ [
+ keras.layers.Conv2D(
+ filters=projection_filters, kernel_size=1, use_bias=False
+ ),
+ keras.layers.BatchNormalization(),
+ keras.layers.Activation("relu"),
+ ]
+ )(concatenated_outs)
+
+ seg = keras.layers.Dropout(0.1)(seg)
+ seg = keras.layers.Conv2D(
+ filters=num_classes, kernel_size=1, activation="softmax"
+ )(seg)
+
+ output = keras.layers.Resizing(
+ height=inputs.shape[1],
+ width=inputs.shape[2],
+ interpolation="bilinear",
+ )(seg)
+
+ super().__init__(
+ inputs=inputs,
+ outputs=output,
+ **kwargs,
+ )
+
+ self.num_classes = num_classes
+ self.projection_filters = projection_filters
+ self.backbone = backbone
+
+ def get_config(self):
+ config = super().get_config()
+ config.update(
+ {
+ "num_classes": self.num_classes,
+ "projection_filters": self.projection_filters,
+ "backbone": keras.saving.serialize_keras_object(self.backbone),
+ }
+ )
+ return config
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return copy.deepcopy(presets)
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return copy.deepcopy(presets_with_weights)
diff --git a/keras_cv/models/segmentation/segformer/segformer_aliases.py b/keras_cv/models/segmentation/segformer/segformer_aliases.py
new file mode 100644
index 0000000000..03547f60f2
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer_aliases.py
@@ -0,0 +1,244 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from keras_cv.models.segmentation.segformer.segformer import SegFormer
+from keras_cv.models.segmentation.segformer.segformer_presets import presets
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """SegFormer model.
+
+ For transfer learning use cases, make sure to read the
+ [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
+
+ Args:
+ backbone: a KerasCV backbone for feature extraction.
+ num_classes: the number of classes for segmentation, including the background class.
+
+ Examples:
+ ```python
+ input_data = tf.ones(shape=(8, 224, 224, 3))
+
+ # Randomly initialized backbone
+ backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet")
+ segformer = keras_cv.models.SegFormer(backbone=backbone, num_classes=19)
+ output = model(input_data)
+ ```
+""" # noqa: E501
+
+
+class SegFormerB0(SegFormer):
+ def __new__(
+ cls,
+ num_classes,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "num_classes": num_classes,
+ }
+ )
+ return SegFormer.from_preset("segformer_b0", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "segformer_b0": copy.deepcopy(presets["segformer_b0"]),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+class SegFormerB1(SegFormer):
+ def __new__(
+ cls,
+ num_classes,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "num_classes": num_classes,
+ }
+ )
+ return SegFormer.from_preset("segformer_b1", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "segformer_b1": copy.deepcopy(presets["segformer_b1"]),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+class SegFormerB2(SegFormer):
+ def __new__(
+ cls,
+ num_classes,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "num_classes": num_classes,
+ }
+ )
+ return SegFormer.from_preset("segformer_b2", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "segformer_b2": copy.deepcopy(presets["segformer_b2"]),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+class SegFormerB3(SegFormer):
+ def __new__(
+ cls,
+ num_classes,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "num_classes": num_classes,
+ }
+ )
+ return SegFormer.from_preset("segformer_b3", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "segformer_b3": copy.deepcopy(presets["segformer_b3"]),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+class SegFormerB4(SegFormer):
+ def __new__(
+ cls,
+ num_classes,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "num_classes": num_classes,
+ }
+ )
+ return SegFormer.from_preset("segformer_b4", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "segformer_b4": copy.deepcopy(presets["segformer_b4"]),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+class SegFormerB5(SegFormer):
+ def __new__(
+ cls,
+ num_classes,
+ **kwargs,
+ ):
+ # Pack args in kwargs
+ kwargs.update(
+ {
+ "num_classes": num_classes,
+ }
+ )
+ return SegFormer.from_preset("segformer_b5", **kwargs)
+
+ @classproperty
+ def presets(cls):
+ """Dictionary of preset names and configurations."""
+ return {
+ "segformer_b5": copy.deepcopy(presets["segformer_b5"]),
+ }
+
+ @classproperty
+ def presets_with_weights(cls):
+ """Dictionary of preset names and configurations that include
+ weights."""
+ return cls.presets
+
+
+setattr(
+ SegFormerB0,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="SegFormerB0"),
+)
+
+setattr(
+ SegFormerB1,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="SegFormerB1"),
+)
+
+setattr(
+ SegFormerB2,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="SegFormerB2"),
+)
+
+setattr(
+ SegFormerB3,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="SegFormerB3"),
+)
+
+setattr(
+ SegFormerB4,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="SegFormerB4"),
+)
+
+setattr(
+ SegFormerB5,
+ "__doc__",
+ ALIAS_DOCSTRING.format(name="SegFormerB5"),
+)
diff --git a/keras_cv/models/segmentation/segformer/segformer_presets.py b/keras_cv/models/segmentation/segformer/segformer_presets.py
new file mode 100644
index 0000000000..e19e2ec9ba
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer_presets.py
@@ -0,0 +1,105 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""SegFormer model preset configurations."""
+
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501
+ backbone_presets,
+)
+
+presets_no_weights = {
+ "segformer_b0": {
+ "metadata": {
+ "description": ("SegFormer model with MiTB0 backbone."),
+ "params": 3719027,
+ "official_name": "SegFormerB0",
+ "path": "segformer_b0",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {
+ "backbone": backbone_presets["mit_b0"],
+ },
+ },
+ "segformer_b1": {
+ "metadata": {
+ "description": ("SegFormer model with MiTB1 backbone."),
+ "params": 13682643,
+ "official_name": "SegFormerB1",
+ "path": "segformer_b1",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {"backbone": backbone_presets["mit_b1"]},
+ },
+ "segformer_b2": {
+ "metadata": {
+ "description": ("SegFormer model with MiTB2 backbone."),
+ "params": 24727507,
+ "official_name": "SegFormerB2",
+ "path": "segformer_b2",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {"backbone": backbone_presets["mit_b2"]},
+ },
+ "segformer_b3": {
+ "metadata": {
+ "description": ("SegFormer model with MiTB3 backbone."),
+ "params": 44603347,
+ "official_name": "SegFormerB3",
+ "path": "segformer_b3",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {"backbone": backbone_presets["mit_b3"]},
+ },
+ "segformer_b4": {
+ "metadata": {
+ "description": ("SegFormer model with MiTB4 backbone."),
+ "params": 61373907,
+ "official_name": "SegFormerB4",
+ "path": "segformer_b4",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {"backbone": backbone_presets["mit_b4"]},
+ },
+ "segformer_b5": {
+ "metadata": {
+ "description": ("SegFormer model with MiTB5 backbone."),
+ "params": 81974227,
+ "official_name": "SegFormerB5",
+ "path": "segformer_b5",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {"backbone": backbone_presets["mit_b5"]},
+ },
+}
+
+presets_with_weights = {
+ "segformer_b0_imagenet": {
+ "metadata": {
+ "description": (
+ "SegFormer model with a pretrained MiTB0 backbone."
+ ),
+ "params": 3719027,
+ "official_name": "SegFormerB0",
+ "path": "segformer_b0",
+ },
+ "class_name": "keras_cv>SegFormer",
+ "config": {
+ "backbone": backbone_presets["mit_b0_imagenet"],
+ },
+ },
+}
+
+presets = {
+ **presets_no_weights,
+ **presets_with_weights,
+}
diff --git a/keras_cv/models/segmentation/segformer/segformer_test.py b/keras_cv/models/segmentation/segformer/segformer_test.py
new file mode 100644
index 0000000000..0990e0e88f
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer_test.py
@@ -0,0 +1,92 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+import tensorflow as tf
+
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.models import MiTBackbone
+from keras_cv.models import SegFormer
+from keras_cv.tests.test_case import TestCase
+
+
+class SegFormerTest(TestCase):
+ def test_segformer_construction(self):
+ backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+ model = SegFormer(backbone=backbone, num_classes=1)
+ model.compile(
+ optimizer="adam",
+ loss=keras.losses.BinaryCrossentropy(),
+ metrics=["accuracy"],
+ )
+
+ @pytest.mark.large
+ def test_segformer_call(self):
+ backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+ model = SegFormer(backbone=backbone, num_classes=1)
+ images = np.random.uniform(size=(2, 512, 512, 3))
+ _ = model(images)
+ _ = model.predict(images)
+
+ @pytest.mark.large
+ def test_weights_change(self):
+ target_size = [512, 512, 2]
+
+ images = tf.ones(shape=[1] + [512, 512, 3])
+ labels = tf.zeros(shape=[1] + target_size)
+ ds = tf.data.Dataset.from_tensor_slices((images, labels))
+ ds = ds.repeat(2)
+ ds = ds.batch(2)
+
+ backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+ model = SegFormer(backbone=backbone, num_classes=2)
+
+ model.compile(
+ optimizer="adam",
+ loss=keras.losses.BinaryCrossentropy(),
+ metrics=["accuracy"],
+ )
+
+ original_weights = model.get_weights()
+ model.fit(ds, epochs=1)
+ updated_weights = model.get_weights()
+
+ for w1, w2 in zip(original_weights, updated_weights):
+ self.assertNotAllEqual(w1, w2)
+ self.assertFalse(ops.any(ops.isnan(w2)))
+
+ @pytest.mark.large # Saving is slow, so mark these large.
+ def test_saved_model(self):
+ target_size = [512, 512, 3]
+
+ backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+ model = SegFormer(backbone=backbone, num_classes=1)
+
+ input_batch = np.ones(shape=[2] + target_size)
+ model_output = model(input_batch)
+
+ save_path = os.path.join(self.get_temp_dir(), "model.keras")
+ model.save(save_path, save_format="keras_v3")
+ restored_model = keras.models.load_model(save_path)
+
+ # Check we got the real object back.
+ self.assertIsInstance(restored_model, SegFormer)
+
+ # Check that output matches.
+ restored_output = restored_model(input_batch)
+ self.assertAllClose(model_output, restored_output)
From acd2681cae2669065207de2623527729a0db0b58 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Fri, 25 Aug 2023 18:33:08 -0600
Subject: [PATCH 17/17] Fix test errors that were introduced by upgrading to
Keras Core 0.1.5 (#2041)
* Fix build process for spatial pyramid pooling
* Fix label encoder for YOLOV8 for 0.1.5
---
keras_cv/layers/spatial_pyramid.py | 10 +++++++++-
.../object_detection/yolo_v8/yolo_v8_label_encoder.py | 2 +-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/keras_cv/layers/spatial_pyramid.py b/keras_cv/layers/spatial_pyramid.py
index 9c9a6849df..b45ee7bda3 100644
--- a/keras_cv/layers/spatial_pyramid.py
+++ b/keras_cv/layers/spatial_pyramid.py
@@ -91,6 +91,7 @@ def build(self, input_shape):
keras.layers.Activation(self.activation),
]
)
+ conv_sequential.build(input_shape)
self.aspp_parallel_channels.append(conv_sequential)
# Channel 2 and afterwards are based on self.dilation_rates, and each of
@@ -109,6 +110,7 @@ def build(self, input_shape):
keras.layers.Activation(self.activation),
]
)
+ conv_sequential.build(input_shape)
self.aspp_parallel_channels.append(conv_sequential)
# Last channel is the global average pooling with conv2D 1x1 kernel.
@@ -125,10 +127,11 @@ def build(self, input_shape):
keras.layers.Activation(self.activation),
]
)
+ pool_sequential.build(input_shape)
self.aspp_parallel_channels.append(pool_sequential)
# Final projection layers
- self.projection = keras.Sequential(
+ projection = keras.Sequential(
[
keras.layers.Conv2D(
filters=self.num_channels,
@@ -140,6 +143,11 @@ def build(self, input_shape):
keras.layers.Dropout(rate=self.dropout),
],
)
+ projection_input_channels = (
+ 2 + len(self.dilation_rates)
+ ) * self.num_channels
+ projection.build(tuple(input_shape[:-1]) + (projection_input_channels,))
+ self.projection = projection
def call(self, inputs, training=None):
"""Calls the Atrous Spatial Pyramid Pooling layer on an input.
diff --git a/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py b/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py
index 48e09740f2..9595cd2ee3 100644
--- a/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py
+++ b/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py
@@ -225,7 +225,7 @@ def encode_to_targets(
# return zeros if no gt boxes are present
return ops.cond(
- max_num_boxes > 0,
+ ops.array(max_num_boxes > 0),
lambda: encode_to_targets(
pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt
),