From 2a704e1a69cc46c808209f45ce19f311b05fe19d Mon Sep 17 00:00:00 2001
From: IMvision12 <88665786+IMvision12@users.noreply.github.com>
Date: Tue, 15 Aug 2023 04:55:44 +0530
Subject: [PATCH 01/17] [RandomTranslation] Supporting Segmentation Masks
 (#2024)

* Added support for segmentation

* Format

* Update ReadMe

* Demo

* interpolation

* Format

* Format
---
 .../segmentation/random_translation_demo.py   | 33 +++++++++++++++++++
 keras_cv/layers/preprocessing/README.md       |  2 +-
 .../preprocessing/random_translation.py       | 28 ++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 examples/layers/preprocessing/segmentation/random_translation_demo.py
diff --git a/examples/layers/preprocessing/segmentation/random_translation_demo.py b/examples/layers/preprocessing/segmentation/random_translation_demo.py
new file mode 100644
index 0000000000..72abb9bcbb
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_translation_demo.py
@@ -0,0 +1,33 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_translation_demo.py shows how to use the RandomTranslation
+preprocessing layer. Uses the oxford iiit pet_dataset. In this
+script the pets are loaded, then are passed through the
+preprocessing layers. Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+    ds = demo_utils.load_oxford_iiit_pet_dataset()
+    randomcutout = preprocessing.RandomTranslation(0.5, 0.5)
+    ds = ds.map(randomcutout, num_parallel_calls=tf.data.AUTOTUNE)
+    demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index fcfacd8cd3..b3fef9e96c 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -38,7 +38,7 @@ The provided table gives an overview of the different augmentation layers availa
 | RandomSaturation | ✅ | ✅ | ✅ | ✅ |
 | RandomSharpness | ✅ | ✅ | ✅ | ✅ |
 | RandomShear | ✅ | ❌ | ✅ | ✅ |
-| RandomTranslation | ✅ | ❌ | ✅ | ✅ |
+| RandomTranslation | ✅ | ✅ | ✅ | ✅ |
 | RandomZoom | ✅ | ❌ | ❌ | ✅ |
 | RepeatedAugmentation <sup>+</sup> | - | - | - | - |
 | Rescaling | ❌ | ✅ | ✅ | ✅ |
diff --git a/keras_cv/layers/preprocessing/random_translation.py b/keras_cv/layers/preprocessing/random_translation.py
index 8b3a99a684..3fcb6d7daf 100644
--- a/keras_cv/layers/preprocessing/random_translation.py
+++ b/keras_cv/layers/preprocessing/random_translation.py
@@ -201,6 +201,34 @@ def augment_images(self, images, transformations, **kwargs):
     def augment_labels(self, labels, transformations, **kwargs):
         return labels
 
+    def augment_segmentation_masks(
+        self, segmentation_masks, transformations, **kwargs
+    ):
+        segmentation_masks = preprocessing_utils.ensure_tensor(
+            segmentation_masks, self.compute_dtype
+        )
+        original_shape = segmentation_masks.shape
+        mask_shape = tf.shape(segmentation_masks)
+        img_hd = tf.cast(mask_shape[H_AXIS], tf.float32)
+        img_wd = tf.cast(mask_shape[W_AXIS], tf.float32)
+        height_translations = transformations["height_translations"]
+        width_translations = transformations["width_translations"]
+        height_translations = height_translations * img_hd
+        width_translations = width_translations * img_wd
+        translations = tf.cast(
+            tf.concat([width_translations, height_translations], axis=1),
+            dtype=tf.float32,
+        )
+        output = preprocessing_utils.transform(
+            segmentation_masks,
+            preprocessing_utils.get_translation_matrix(translations),
+            interpolation="nearest",
+            fill_mode=self.fill_mode,
+            fill_value=self.fill_value,
+        )
+        output.set_shape(original_shape)
+        return output
+
     def augment_bounding_boxes(
         self, bounding_boxes, transformations, images=None, **kwargs
     ):

From d611c83e73e86a829893eb1d64eb207ecfb4a37b Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Tue, 15 Aug 2023 11:48:23 -0600
Subject: [PATCH 02/17] Restructure retinanet steps to avoid modifying args
 (#2029)

---
 .../object_detection/retinanet/retinanet.py   | 59 +++++--------------
 1 file changed, 14 insertions(+), 45 deletions(-)

diff --git a/keras_cv/models/object_detection/retinanet/retinanet.py b/keras_cv/models/object_detection/retinanet/retinanet.py
index 4c9fc5909d..3dd42c185c 100644
--- a/keras_cv/models/object_detection/retinanet/retinanet.py
+++ b/keras_cv/models/object_detection/retinanet/retinanet.py
@@ -396,10 +396,18 @@ def compile(
         super().compile(loss=losses, **kwargs)
 
     def compute_loss(self, x, y, y_pred, sample_weight, **kwargs):
+        y_for_label_encoder = bounding_box.convert_format(
+            y,
+            source=self.bounding_box_format,
+            target=self.label_encoder.bounding_box_format,
+            images=x,
+        )
+
+        boxes, classes = self.label_encoder(x, y_for_label_encoder)
+
         box_pred = y_pred["box"]
         cls_pred = y_pred["classification"]
-        boxes = y["box"]
-        classes = y["classification"]
+
         if boxes.shape[-1] != 4:
             raise ValueError(
                 "boxes should have shape (None, None, 4). Got "
@@ -453,50 +461,15 @@ def compute_loss(self, x, y, y_pred, sample_weight, **kwargs):
 
     def train_step(self, *args):
         data = args[-1]
+        args = args[:-1]
         x, y = unpack_input(data)
-
-        y_for_label_encoder = bounding_box.convert_format(
-            y,
-            source=self.bounding_box_format,
-            target=self.label_encoder.bounding_box_format,
-            images=x,
-        )
-
-        boxes, classes = self.label_encoder(x, y_for_label_encoder)
-        super_args = args[:-1] + (
-            (
-                x,
-                {"box": boxes, "classification": classes, "unencoded": y},
-            ),
-        )
-
-        return super().train_step(*super_args)
+        return super().train_step(*args, (x, y))
 
     def test_step(self, *args):
         data = args[-1]
+        args = args[:-1]
         x, y = unpack_input(data)
-        y_for_label_encoder = bounding_box.convert_format(
-            y,
-            source=self.bounding_box_format,
-            target=self.label_encoder.bounding_box_format,
-            images=x,
-        )
-        boxes, classes = self.label_encoder(x, y_for_label_encoder)
-        boxes = bounding_box.convert_format(
-            boxes,
-            source=self.label_encoder.bounding_box_format,
-            target=self.bounding_box_format,
-            images=x,
-        )
-
-        super_args = args[:-1] + (
-            (
-                x,
-                {"box": boxes, "classification": classes, "unencoded": y},
-            ),
-        )
-
-        return super().test_step(*super_args)
+        return super().test_step(*args, (x, y))
 
     def compute_metrics(self, x, y, y_pred, sample_weight):
         metrics = {}
@@ -505,10 +478,6 @@ def compute_metrics(self, x, y, y_pred, sample_weight):
         if not self._has_user_metrics:
             return metrics
 
-        # For computing non-loss metrics, we don't care about the encoded
-        # boxes and classes, just the raw input boxes.
-        y = y["unencoded"]
-
         y_pred = self.decode_predictions(y_pred, x)
 
         for metric in self._user_metrics:

From db57e1d0dab58e97c7cedfe1db2fe6a3dd7750db Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:17:19 -0600
Subject: [PATCH 03/17] Use non-ragged outputs in MultiClassNMS (#2030)

* Use non-ragged outputs in MultiClassNMS

Using Ragged outputs that weren't subsequently padded was causing issues in the PyCOCOCallback, and we shouldn't silently default to Ragged anywhere.

* Update multi_class_non_max_suppression_test.py
---
 .../object_detection/multi_class_non_max_suppression.py     | 2 +-
 .../multi_class_non_max_suppression_test.py                 | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression.py
index 3d34eafbf2..7825268578 100644
--- a/keras_cv/layers/object_detection/multi_class_non_max_suppression.py
+++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression.py
@@ -123,7 +123,7 @@ def call(
         }
         # this is required to comply with KerasCV bounding box format.
         return bounding_box.mask_invalid_detections(
-            bounding_boxes, output_ragged=True
+            bounding_boxes, output_ragged=False
         )
 
     def get_config(self):
diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py
index f019e182d7..19eef623ff 100644
--- a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py
+++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py
@@ -47,6 +47,6 @@ def decode_predictions_output_shapes():
 class NmsPredictionDecoderTest(TestCase):
     def test_decode_predictions_output_shapes(self):
         result = decode_predictions_output_shapes()
-        self.assertEqual(result["boxes"].shape, [8, None, 4])
-        self.assertEqual(result["classes"].shape, [8, None])
-        self.assertEqual(result["confidence"].shape, [8, None])
+        self.assertEqual(result["boxes"].shape, [8, 100, 4])
+        self.assertEqual(result["classes"].shape, [8, 100])
+        self.assertEqual(result["confidence"].shape, [8, 100])

From 39278475b0751ce94d0f4c1c2673040b66ba17a1 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:42:11 -0600
Subject: [PATCH 04/17] Use correct convention for static shape in point_cloud
 grouping (#2028)

* Use correct convention for static shape in point_cloud grouping

* Use len(shape) for rank
---
 keras_cv/point_cloud/point_cloud.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/keras_cv/point_cloud/point_cloud.py b/keras_cv/point_cloud/point_cloud.py
index 5e27e8b62c..ad345f915d 100644
--- a/keras_cv/point_cloud/point_cloud.py
+++ b/keras_cv/point_cloud/point_cloud.py
@@ -72,10 +72,10 @@ def group_points_by_boxes(points, boxes):
       box, all the point indices that belong to the box.
 
     """
-    num_boxes = boxes.get_shape().as_list()[-2] or tf.shape(boxes)[-2]
+    num_boxes = boxes.shape[-2] or tf.shape(boxes)[-2]
     # [..., num_points]
     box_indices = within_box3d_index(points, boxes)
-    num_points = points.get_shape().as_list()[-2] or tf.shape(points)[-2]
+    num_points = points.shape[-2] or tf.shape(points)[-2]
     point_indices = tf.range(num_points, dtype=tf.int32)
 
     def group_per_sample(box_index):
@@ -87,7 +87,7 @@ def group_per_sample(box_index):
         )
         return res
 
-    boxes_rank = boxes.shape.rank
+    boxes_rank = len(boxes.shape)
     if boxes_rank == 2:
         return group_per_sample(box_indices)
     elif boxes_rank == 3:

From 1eabccb30908a1a10143c0c46204443d7c7825d5 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Thu, 17 Aug 2023 07:46:23 +0530
Subject: [PATCH 05/17] Migrate Efficientnetlite to Backbone (#1877)

* initial commit with moved files+added files

* removed unwanted

* added presets

* backbone file changes

* aliases file added

* backbone fix

* inits

* format and lint

* format and lint

* add test+format and lint

* update params

* reviewed comments

* fix

* fix

* updated test backbone

* review changes from #1716

* fix

* port

* port:typo+fix

* port: fix test error

* port: update

* fix backbone

* fix preset in aliases

* nits
---
 keras_cv/models/__init__.py                   |  18 +
 keras_cv/models/backbones/backbone_presets.py |   5 +
 .../backbones/efficientnet_lite/__init__.py   |  13 +
 .../efficientnet_lite_aliases.py              | 228 ++++++
 .../efficientnet_lite_backbone.py             | 366 ++++++++++
 .../efficientnet_lite_backbone_presets.py     | 175 +++++
 ...efficientnet_lite_backbone_presets_test.py |  60 ++
 .../efficientnet_lite_backbone_test.py        | 162 +++++
 keras_cv/models/legacy/__init__.py            |   5 -
 keras_cv/models/legacy/efficientnet_lite.py   | 678 ------------------
 .../models/legacy/efficientnet_lite_test.py   |  55 --
 11 files changed, 1027 insertions(+), 738 deletions(-)
 create mode 100644 keras_cv/models/backbones/efficientnet_lite/__init__.py
 create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py
 create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
 create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py
 create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py
 create mode 100644 keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py
 delete mode 100644 keras_cv/models/legacy/efficientnet_lite.py
 delete mode 100644 keras_cv/models/legacy/efficientnet_lite_test.py

diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py
index 3e5847a346..1861b49c03 100644
--- a/keras_cv/models/__init__.py
+++ b/keras_cv/models/__init__.py
@@ -43,6 +43,24 @@
 from keras_cv.models.backbones.densenet.densenet_backbone import (
     DenseNetBackbone,
 )
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB1Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB2Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB3Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB4Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import (  # noqa: E501
+    EfficientNetLiteBackbone,
+)
 from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import (
     EfficientNetV2B0Backbone,
 )
diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py
index 1535d91374..3852b28877 100644
--- a/keras_cv/models/backbones/backbone_presets.py
+++ b/keras_cv/models/backbones/backbone_presets.py
@@ -16,6 +16,9 @@
 
 from keras_cv.models.backbones.csp_darknet import csp_darknet_backbone_presets
 from keras_cv.models.backbones.densenet import densenet_backbone_presets
+from keras_cv.models.backbones.efficientnet_lite import (
+    efficientnet_lite_backbone_presets,
+)
 from keras_cv.models.backbones.efficientnet_v2 import (
     efficientnet_v2_backbone_presets,
 )
@@ -31,6 +34,7 @@
     **csp_darknet_backbone_presets.backbone_presets_no_weights,
     **efficientnet_v2_backbone_presets.backbone_presets_no_weights,
     **densenet_backbone_presets.backbone_presets_no_weights,
+    **efficientnet_lite_backbone_presets.backbone_presets_no_weights,
     **yolo_v8_backbone_presets.backbone_presets_no_weights,
 }
 
@@ -41,6 +45,7 @@
     **csp_darknet_backbone_presets.backbone_presets_with_weights,
     **efficientnet_v2_backbone_presets.backbone_presets_with_weights,
     **densenet_backbone_presets.backbone_presets_with_weights,
+    **efficientnet_lite_backbone_presets.backbone_presets_with_weights,
     **yolo_v8_backbone_presets.backbone_presets_with_weights,
 }
 
diff --git a/keras_cv/models/backbones/efficientnet_lite/__init__.py b/keras_cv/models/backbones/efficientnet_lite/__init__.py
new file mode 100644
index 0000000000..3992ffb59a
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py
new file mode 100644
index 0000000000..1a8fe92404
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_aliases.py
@@ -0,0 +1,228 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import (  # noqa: E501
+    EfficientNetLiteBackbone,
+)
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """Instantiates the {name} architecture.
+
+    Reference:
+    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+      (ICML 2019)
+
+    Args:
+        include_rescaling: bool, whether to rescale the inputs. If set
+            to `True`, inputs will be passed through a `Rescaling(1/255.0)`
+            layer.
+        input_shape: optional shape tuple, defaults to (None, None, 3).
+        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+            to use as image input for the model.
+    Usage:
+    ```python
+    input_data = np.ones(shape=(8, 224, 224, 3))
+
+    # Randomly initialized backbone
+    model = {name}Backbone()
+    output = model(input_data)
+    ```
+"""  # noqa: E501
+
+
+class EfficientNetLiteB0Backbone(EfficientNetLiteBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetLiteBackbone.from_preset(
+            "efficientnetlite_b0", **kwargs
+        )
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetLiteB1Backbone(EfficientNetLiteBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetLiteBackbone.from_preset(
+            "efficientnetlite_b1", **kwargs
+        )
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetLiteB2Backbone(EfficientNetLiteBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetLiteBackbone.from_preset(
+            "efficientnetlite_b2", **kwargs
+        )
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetLiteB3Backbone(EfficientNetLiteBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetLiteBackbone.from_preset(
+            "efficientnetlite_b3", **kwargs
+        )
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetLiteB4Backbone(EfficientNetLiteBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetLiteBackbone.from_preset(
+            "efficientnetlite_b4", **kwargs
+        )
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+setattr(
+    EfficientNetLiteB0Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetLiteB0"),
+)
+setattr(
+    EfficientNetLiteB1Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetLiteB1"),
+)
+setattr(
+    EfficientNetLiteB2Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetLiteB2"),
+)
+setattr(
+    EfficientNetLiteB3Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetLiteB3"),
+)
+setattr(
+    EfficientNetLiteB4Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetLiteB4"),
+)
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
new file mode 100644
index 0000000000..d3a6fd8815
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
@@ -0,0 +1,366 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""EfficientNet Lite backbone model.
+
+Reference:
+    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+        (ICML 2019)
+    - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
+"""  # noqa: E501
+
+import copy
+import math
+
+from keras_cv.backend import keras
+from keras_cv.models import utils
+from keras_cv.models.backbones.backbone import Backbone
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone_presets import (  # noqa: E501
+    backbone_presets,
+)
+from keras_cv.utils.python_utils import classproperty
+
+BN_AXIS = 3
+
+
+@keras.saving.register_keras_serializable(package="keras_cv.models")
+class EfficientNetLiteBackbone(Backbone):
+    """Instantiates the EfficientNetLite architecture using given scaling
+    coefficients.
+
+    Reference:
+    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+        (ICML 2019)
+    - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
+
+    Args:
+        include_rescaling: whether to rescale the inputs. If set to True,
+            inputs will be passed through a `Rescaling(1/255.0)` layer.
+        width_coefficient: float, scaling coefficient for network width.
+        depth_coefficient: float, scaling coefficient for network depth.
+        dropout_rate: float, dropout rate before final classifier layer.
+        drop_connect_rate: float, dropout rate at skip connections. The
+            default value is set to 0.2.
+        depth_divisor: integer, a unit of network width. The default value
+            is set to 8.
+        activation: activation function.
+        input_shape: optional shape tuple,
+            It should have exactly 3 inputs channels.
+        input_tensor: optional Keras tensor (i.e. output of `keras.layers.Input()`)
+            to use as image input for the model.
+
+    Usage:
+    ```python
+    # Construct an EfficientNetLite from a preset:
+    efficientnet = models.EfficientNetLiteBackbone.from_preset(
+        "efficientnetlite_b0"
+    )
+    images = np.ones((1, 256, 256, 3))
+    outputs = efficientnet.predict(images)
+
+    # Alternatively, you can also customize the EfficientNetLite architecture:
+    model = EfficientNetLiteBackbone(
+        stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+        stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+        stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+        stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+        stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+        stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+        width_coefficient=1.0,
+        depth_coefficient=1.0,
+        include_rescaling=False,
+    )
+    images = np.ones((1, 256, 256, 3))
+    outputs = model.predict(images)
+    ```
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        *,
+        include_rescaling,
+        width_coefficient,
+        depth_coefficient,
+        stackwise_kernel_sizes,
+        stackwise_num_repeats,
+        stackwise_input_filters,
+        stackwise_output_filters,
+        stackwise_expansion_ratios,
+        stackwise_strides,
+        dropout_rate=0.2,
+        drop_connect_rate=0.2,
+        depth_divisor=8,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        activation="relu6",
+        **kwargs,
+    ):
+        img_input = utils.parse_model_inputs(input_shape, input_tensor)
+
+        # Build stem
+        x = img_input
+
+        if include_rescaling:
+            # Use common rescaling strategy across keras_cv
+            x = keras.layers.Rescaling(1.0 / 255.0)(x)
+
+        x = keras.layers.ZeroPadding2D(
+            padding=utils.correct_pad_downsample(x, 3), name="stem_conv_pad"
+        )(x)
+        x = keras.layers.Conv2D(
+            32,
+            3,
+            strides=2,
+            padding="valid",
+            use_bias=False,
+            kernel_initializer=conv_kernel_initializer(),
+            name="stem_conv",
+        )(x)
+        x = keras.layers.BatchNormalization(axis=BN_AXIS, name="stem_bn")(x)
+        x = keras.layers.Activation(activation, name="stem_activation")(x)
+
+        # Build blocks
+        block_id = 0
+        blocks = float(sum(stackwise_num_repeats))
+
+        pyramid_level_inputs = []
+
+        for i in range(len(stackwise_kernel_sizes)):
+            num_repeats = stackwise_num_repeats[i]
+            input_filters = stackwise_input_filters[i]
+            output_filters = stackwise_output_filters[i]
+            # Update block input and output filters based on depth multiplier.
+            input_filters = round_filters(
+                filters=input_filters,
+                width_coefficient=width_coefficient,
+                depth_divisor=depth_divisor,
+            )
+            output_filters = round_filters(
+                filters=output_filters,
+                width_coefficient=width_coefficient,
+                depth_divisor=depth_divisor,
+            )
+
+            if i == 0 or i == (len(stackwise_kernel_sizes) - 1):
+                repeats = num_repeats
+            else:
+                repeats = round_repeats(
+                    repeats=num_repeats,
+                    depth_coefficient=depth_coefficient,
+                )
+            strides = stackwise_strides[i]
+
+            for j in range(repeats):
+                # The first block needs to take care of stride and filter size
+                # increase.
+                if j > 0:
+                    strides = 1
+                    input_filters = output_filters
+
+                if strides != 1:
+                    pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+                # 97 is the start of the lowercase alphabet.
+                letter_identifier = chr(j + 97)
+                x = apply_efficient_net_lite_block(
+                    inputs=x,
+                    filters_in=input_filters,
+                    filters_out=output_filters,
+                    kernel_size=stackwise_kernel_sizes[i],
+                    strides=strides,
+                    expand_ratio=stackwise_expansion_ratios[i],
+                    activation=activation,
+                    dropout_rate=drop_connect_rate * block_id / blocks,
+                    name="block{}{}_".format(i + 1, letter_identifier),
+                )
+                block_id += 1
+
+        # Build top
+        x = keras.layers.Conv2D(
+            1280,
+            1,
+            padding="same",
+            use_bias=False,
+            kernel_initializer=conv_kernel_initializer(),
+            name="top_conv",
+        )(x)
+        x = keras.layers.BatchNormalization(axis=BN_AXIS, name="top_bn")(x)
+        x = keras.layers.Activation(activation, name="top_activation")(x)
+
+        pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+        # Create model.
+        super().__init__(inputs=img_input, outputs=x, **kwargs)
+
+        self.include_rescaling = include_rescaling
+        self.width_coefficient = width_coefficient
+        self.depth_coefficient = depth_coefficient
+        self.dropout_rate = dropout_rate
+        self.drop_connect_rate = drop_connect_rate
+        self.depth_divisor = depth_divisor
+        self.activation = activation
+        self.input_tensor = input_tensor
+        self.pyramid_level_inputs = {
+            f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs)
+        }
+        self.stackwise_kernel_sizes = stackwise_kernel_sizes
+        self.stackwise_num_repeats = stackwise_num_repeats
+        self.stackwise_input_filters = stackwise_input_filters
+        self.stackwise_output_filters = stackwise_output_filters
+        self.stackwise_expansion_ratios = stackwise_expansion_ratios
+        self.stackwise_strides = stackwise_strides
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "include_rescaling": self.include_rescaling,
+                "width_coefficient": self.width_coefficient,
+                "depth_coefficient": self.depth_coefficient,
+                "dropout_rate": self.dropout_rate,
+                "drop_connect_rate": self.drop_connect_rate,
+                "depth_divisor": self.depth_divisor,
+                "activation": self.activation,
+                "input_tensor": self.input_tensor,
+                "input_shape": self.input_shape[1:],
+                "stackwise_kernel_sizes": self.stackwise_kernel_sizes,
+                "stackwise_num_repeats": self.stackwise_num_repeats,
+                "stackwise_input_filters": self.stackwise_input_filters,
+                "stackwise_output_filters": self.stackwise_output_filters,
+                "stackwise_expansion_ratios": self.stackwise_expansion_ratios,
+                "stackwise_strides": self.stackwise_strides,
+            }
+        )
+        return config
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return copy.deepcopy(backbone_presets)
+
+
+def conv_kernel_initializer(scale=2.0):
+    return keras.initializers.VarianceScaling(
+        scale=scale, mode="fan_out", distribution="truncated_normal"
+    )
+
+
+def round_filters(filters, depth_divisor, width_coefficient):
+    """Round number of filters based on depth multiplier."""
+    filters *= width_coefficient
+    new_filters = max(
+        depth_divisor,
+        int(filters + depth_divisor / 2) // depth_divisor * depth_divisor,
+    )
+    # Make sure that round down does not go down by more than 10%.
+    if new_filters < 0.9 * filters:
+        new_filters += depth_divisor
+    return int(new_filters)
+
+
+def round_repeats(repeats, depth_coefficient):
+    """Round number of repeats based on depth multiplier."""
+    return int(math.ceil(depth_coefficient * repeats))
+
+
+def apply_efficient_net_lite_block(
+    inputs,
+    activation="relu6",
+    dropout_rate=0.0,
+    name=None,
+    filters_in=32,
+    filters_out=16,
+    kernel_size=3,
+    strides=1,
+    expand_ratio=1,
+):
+    """An inverted residual block, without SE phase.
+
+    Args:
+        inputs: input tensor.
+        activation: activation function.
+        dropout_rate: float between 0 and 1, fraction of the input units to drop.
+        name: string, block label.
+        filters_in: integer, the number of input filters.
+        filters_out: integer, the number of output filters.
+        kernel_size: integer, the dimension of the convolution window.
+        strides: integer, the stride of the convolution.
+        expand_ratio: integer, scaling coefficient for the input filters.
+
+    Returns:
+        output tensor for the block.
+    """  # noqa: E501
+    if name is None:
+        name = f"block_{keras.backend.get_uid('block_')}_"
+
+    # Expansion phase
+    filters = filters_in * expand_ratio
+    if expand_ratio != 1:
+        x = keras.layers.Conv2D(
+            filters,
+            1,
+            padding="same",
+            use_bias=False,
+            kernel_initializer=conv_kernel_initializer(),
+            name=name + "expand_conv",
+        )(inputs)
+        x = keras.layers.BatchNormalization(
+            axis=BN_AXIS, name=name + "expand_bn"
+        )(x)
+        x = keras.layers.Activation(
+            activation, name=name + "expand_activation"
+        )(x)
+    else:
+        x = inputs
+
+    # Depthwise Convolution
+    if strides == 2:
+        x = keras.layers.ZeroPadding2D(
+            padding=utils.correct_pad_downsample(x, kernel_size),
+            name=name + "dwconv_pad",
+        )(x)
+        conv_pad = "valid"
+    else:
+        conv_pad = "same"
+    x = keras.layers.DepthwiseConv2D(
+        kernel_size,
+        strides=strides,
+        padding=conv_pad,
+        use_bias=False,
+        depthwise_initializer=conv_kernel_initializer(),
+        name=name + "dwconv",
+    )(x)
+    x = keras.layers.BatchNormalization(axis=BN_AXIS, name=name + "bn")(x)
+    x = keras.layers.Activation(activation, name=name + "activation")(x)
+
+    # Output phase
+    x = keras.layers.Conv2D(
+        filters_out,
+        1,
+        padding="same",
+        use_bias=False,
+        kernel_initializer=conv_kernel_initializer(),
+        name=name + "project_conv",
+    )(x)
+    x = keras.layers.BatchNormalization(axis=BN_AXIS, name=name + "project_bn")(
+        x
+    )
+    if strides == 1 and filters_in == filters_out:
+        if dropout_rate > 0:
+            x = keras.layers.Dropout(
+                dropout_rate, noise_shape=(None, 1, 1, 1), name=name + "drop"
+            )(x)
+        x = keras.layers.add([x, inputs], name=name + "add")
+    return x
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py
new file mode 100644
index 0000000000..db9838e3de
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets.py
@@ -0,0 +1,175 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""EfficientNetLite model preset configurations."""
+
+backbone_presets_no_weights = {
+    "efficientnetlite_b0": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.0` and `depth_coefficient=1.0`."
+            ),
+            "params": 3414176,
+            "official_name": "EfficientNetLite",
+            "path": "EfficientNetLite",
+        },
+        "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+        "config": {
+            "width_coefficient": 1.0,
+            "depth_coefficient": 1.0,
+            "dropout_rate": 0.2,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "relu6",
+        },
+    },
+    "efficientnetlite_b1": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.0` and `depth_coefficient=1.1`."
+            ),
+            "params": 4190496,
+            "official_name": "EfficientNetLite",
+            "path": "EfficientNetLite",
+        },
+        "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+        "config": {
+            "width_coefficient": 1.0,
+            "depth_coefficient": 1.1,
+            "dropout_rate": 0.2,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "relu6",
+        },
+    },
+    "efficientnetlite_b2": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.1` and `depth_coefficient=1.2`."
+            ),
+            "params": 4870320,
+            "official_name": "EfficientNetLite",
+            "path": "EfficientNetLite",
+        },
+        "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+        "config": {
+            "width_coefficient": 1.1,
+            "depth_coefficient": 1.2,
+            "dropout_rate": 0.3,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "relu6",
+        },
+    },
+    "efficientnetlite_b3": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.2` and `depth_coefficient=1.4`."
+            ),
+            "params": 6994504,
+            "official_name": "EfficientNetLite",
+            "path": "EfficientNetLite",
+        },
+        "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+        "config": {
+            "width_coefficient": 1.2,
+            "depth_coefficient": 1.4,
+            "dropout_rate": 0.3,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "relu6",
+        },
+    },
+    "efficientnetlite_b4": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.4` and `depth_coefficient=1.8`."
+            ),
+            "params": 11840256,
+            "official_name": "EfficientNetLite",
+            "path": "EfficientNetLite",
+        },
+        "class_name": "keras_cv.models>EfficientNetLiteBackbone",
+        "config": {
+            "width_coefficient": 1.4,
+            "depth_coefficient": 1.8,
+            "dropout_rate": 0.3,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "relu6",
+        },
+    },
+}
+
+backbone_presets_with_weights = {}
+
+backbone_presets = {
+    **backbone_presets_no_weights,
+    **backbone_presets_with_weights,
+}
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py
new file mode 100644
index 0000000000..d4e783141e
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py
@@ -0,0 +1,60 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import (  # noqa: E501
+    EfficientNetLiteBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+@pytest.mark.extra_large
+class EfficientNetLitePresetFullTest(TestCase):
+    """
+    Test the full enumeration of our preset.
+    This tests every preset for EfficientNetLite and is only run manually.
+    Run with:
+    `pytest keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_presets_test.py --run_extra_large`
+    """  # noqa: E501
+
+    @parameterized.named_parameters(
+        *[(preset, preset) for preset in EfficientNetLiteBackbone.presets]
+    )
+    def test_load_efficientnetlite(self, preset):
+        input_data = np.ones(shape=(2, 224, 224, 3))
+        model = EfficientNetLiteBackbone.from_preset(preset)
+        model(input_data)
+
+    def test_efficientnetlite_feature_extractor(self):
+        model = EfficientNetLiteB0Backbone(
+            include_rescaling=False,
+            input_shape=[256, 256, 3],
+        )
+        levels = ["P3", "P4"]
+        layer_names = [model.pyramid_level_inputs[level] for level in levels]
+        backbone_model = get_feature_extractor(model, layer_names, levels)
+        inputs = keras.Input(shape=[256, 256, 3])
+        outputs = backbone_model(inputs)
+        self.assertLen(outputs, 2)
+        self.assertEquals(list(outputs.keys()), levels)
+        self.assertEquals(outputs["P3"].shape[:3], (None, 32, 32))
+        self.assertEquals(outputs["P4"].shape[:3], (None, 16, 16))
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py
new file mode 100644
index 0000000000..195e6ea0cf
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone_test.py
@@ -0,0 +1,162 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_aliases import (  # noqa: E501
+    EfficientNetLiteB0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import (  # noqa: E501
+    EfficientNetLiteBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+class EfficientNetLiteBackboneTest(TestCase):
+    def setUp(self):
+        self.input_batch = np.ones(shape=(8, 224, 224, 3))
+
+    def test_valid_call(self):
+        model = EfficientNetLiteBackbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=False,
+        )
+        model(self.input_batch)
+
+    def test_valid_call_alias_model_with_rescaling(self):
+        model = EfficientNetLiteB0Backbone(include_rescaling=True)
+        model(self.input_batch)
+
+    def test_valid_call_with_rescaling(self):
+        model = EfficientNetLiteBackbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=True,
+        )
+        model(self.input_batch)
+
+    @pytest.mark.large  # Saving is slow, so mark these large.
+    def test_saved_model(self):
+        model = EfficientNetLiteBackbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=True,
+        )
+        model_output = model(self.input_batch)
+        save_path = os.path.join(
+            self.get_temp_dir(), "efficientnet_lite_backbone.keras"
+        )
+        model.save(save_path)
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        self.assertIsInstance(restored_model, EfficientNetLiteBackbone)
+
+        # Check that output matches.
+        restored_output = restored_model(self.input_batch)
+        self.assertAllClose(model_output, restored_output)
+
+    @pytest.mark.large  # Saving is slow, so mark these large.
+    def test_saved_alias_model(self):
+        model = EfficientNetLiteB0Backbone()
+        model_output = model(self.input_batch)
+        save_path = os.path.join(
+            self.get_temp_dir(), "efficientnet_lite_backbone.keras"
+        )
+        model.save(save_path)
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        # Note that these aliases serialized as the base class
+        self.assertIsInstance(restored_model, EfficientNetLiteBackbone)
+
+        # Check that output matches.
+        restored_output = restored_model(self.input_batch)
+        self.assertAllClose(model_output, restored_output)
+
+    def test_feature_pyramid_inputs(self):
+        model = EfficientNetLiteB0Backbone()
+        backbone_model = get_feature_extractor(
+            model,
+            model.pyramid_level_inputs.values(),
+            model.pyramid_level_inputs.keys(),
+        )
+        input_size = 256
+        inputs = keras.Input(shape=[input_size, input_size, 3])
+        outputs = backbone_model(inputs)
+        levels = ["P1", "P2", "P3", "P4", "P5"]
+        self.assertEquals(list(outputs.keys()), levels)
+        self.assertEquals(
+            outputs["P1"].shape,
+            (None, input_size // 2**1, input_size // 2**1, 16),
+        )
+        self.assertEquals(
+            outputs["P2"].shape,
+            (None, input_size // 2**2, input_size // 2**2, 24),
+        )
+        self.assertEquals(
+            outputs["P3"].shape,
+            (None, input_size // 2**3, input_size // 2**3, 40),
+        )
+        self.assertEquals(
+            outputs["P4"].shape,
+            (None, input_size // 2**4, input_size // 2**4, 112),
+        )
+        self.assertEquals(
+            outputs["P5"].shape,
+            (None, input_size // 2**5, input_size // 2**5, 1280),
+        )
+
+    @parameterized.named_parameters(
+        ("one_channel", 1),
+        ("four_channels", 4),
+    )
+    def test_application_variable_input_channels(self, num_channels):
+        model = EfficientNetLiteBackbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=True,
+        )
+        self.assertEqual(model.output_shape, (None, None, None, 1280))
diff --git a/keras_cv/models/legacy/__init__.py b/keras_cv/models/legacy/__init__.py
index 794687b9c7..20df5826f0 100644
--- a/keras_cv/models/legacy/__init__.py
+++ b/keras_cv/models/legacy/__init__.py
@@ -24,11 +24,6 @@
 from keras_cv.models.legacy.convnext import ConvNeXtXLarge
 from keras_cv.models.legacy.darknet import DarkNet21
 from keras_cv.models.legacy.darknet import DarkNet53
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB0
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB1
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB2
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB3
-from keras_cv.models.legacy.efficientnet_lite import EfficientNetLiteB4
 from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB0
 from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB1
 from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB2
diff --git a/keras_cv/models/legacy/efficientnet_lite.py b/keras_cv/models/legacy/efficientnet_lite.py
deleted file mode 100644
index a2ae8d3606..0000000000
--- a/keras_cv/models/legacy/efficientnet_lite.py
+++ /dev/null
@@ -1,678 +0,0 @@
-# Copyright 2023 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""EfficientNet Lite models for Keras.
-
-Reference:
-    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
-        (ICML 2019)
-    - [Based on the original EfficientNet Lite's](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
-"""  # noqa: E501
-
-import copy
-import math
-
-import tensorflow as tf
-from keras import backend
-from keras import layers
-from tensorflow import keras
-
-from keras_cv.models.legacy import utils
-from keras_cv.models.legacy.weights import parse_weights
-from keras_cv.models.utils import correct_pad_downsample
-
-DEFAULT_BLOCKS_ARGS = [
-    {
-        "kernel_size": 3,
-        "repeats": 1,
-        "filters_in": 32,
-        "filters_out": 16,
-        "expand_ratio": 1,
-        "id_skip": True,
-        "strides": 1,
-    },
-    {
-        "kernel_size": 3,
-        "repeats": 2,
-        "filters_in": 16,
-        "filters_out": 24,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-    },
-    {
-        "kernel_size": 5,
-        "repeats": 2,
-        "filters_in": 24,
-        "filters_out": 40,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-    },
-    {
-        "kernel_size": 3,
-        "repeats": 3,
-        "filters_in": 40,
-        "filters_out": 80,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-    },
-    {
-        "kernel_size": 5,
-        "repeats": 3,
-        "filters_in": 80,
-        "filters_out": 112,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 1,
-    },
-    {
-        "kernel_size": 5,
-        "repeats": 4,
-        "filters_in": 112,
-        "filters_out": 192,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-    },
-    {
-        "kernel_size": 3,
-        "repeats": 1,
-        "filters_in": 192,
-        "filters_out": 320,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 1,
-    },
-]
-CONV_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 2.0,
-        "mode": "fan_out",
-        "distribution": "truncated_normal",
-    },
-}
-
-DENSE_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 1.0 / 3.0,
-        "mode": "fan_out",
-        "distribution": "uniform",
-    },
-}
-
-BASE_DOCSTRING = """Instantiates the {name} architecture.
-
-    Reference:
-    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
-        (ICML 2019)
-
-    This function returns a Keras {name} model.
-
-    For image classification use cases, see [this page for detailed examples](https://keras.io/api/applications/#usage-examples-for-image-classification-models).
-
-    For transfer learning use cases, make sure to read the
-    [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
-
-    Args:
-        include_rescaling: bool, whether to rescale the inputs. If set
-            to `True`, inputs will be passed through a `Rescaling(1/255.0)`
-            layer.
-        include_top: bool, whether to include the fully-connected layer at the
-            top of the network. If provided, `num_classes` must be provided.
-        num_classes: optional int, number of classes to classify images into
-            (only to be specified if `include_top` is `True`).
-        weights: one of `None` (random initialization), a pretrained weight file
-            path, or a reference to pre-trained weights (e.g.
-            'imagenet/classification')(see available pre-trained weights in
-            weights.py)
-        input_shape: optional shape tuple, defaults to (None, None, 3).
-        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
-            to use as image input for the model.
-        pooling: optional pooling mode for feature extraction
-            when `include_top` is `False`.
-            - `None` means that the output of the model will be the 4D tensor
-                output of the last convolutional block.
-            - `avg` means that global average pooling will be applied to the
-                output of the last convolutional block, and thus the output of
-                the model will be a 2D tensor.
-            - `max` means that global max pooling will be applied.
-        classifier_activation: A `str` or callable. The activation function to
-            use on the "top" layer. Ignored unless `include_top=True`. Set
-            `classifier_activation=None` to return the logits of the "top"
-            layer.
-        name: (Optional) name to pass to the model, defaults to "{name}".
-
-    Returns:
-        A `keras.Model` instance.
-"""  # noqa: E501
-
-BN_AXIS = 3
-
-
-def round_filters(filters, depth_divisor, width_coefficient):
-    """Round number of filters based on depth multiplier."""
-    filters *= width_coefficient
-    new_filters = max(
-        depth_divisor,
-        int(filters + depth_divisor / 2) // depth_divisor * depth_divisor,
-    )
-    # Make sure that round down does not go down by more than 10%.
-    if new_filters < 0.9 * filters:
-        new_filters += depth_divisor
-    return int(new_filters)
-
-
-def round_repeats(repeats, depth_coefficient):
-    """Round number of repeats based on depth multiplier."""
-    return int(math.ceil(depth_coefficient * repeats))
-
-
-def apply_efficient_net_lite_block(
-    inputs,
-    activation="relu6",
-    drop_rate=0.0,
-    name=None,
-    filters_in=32,
-    filters_out=16,
-    kernel_size=3,
-    strides=1,
-    expand_ratio=1,
-    id_skip=True,
-):
-    """An inverted residual block, without SE phase.
-
-    Args:
-        inputs: input tensor.
-        activation: activation function.
-        drop_rate: float between 0 and 1, fraction of the input units to drop.
-        name: string, block label.
-        filters_in: integer, the number of input filters.
-        filters_out: integer, the number of output filters.
-        kernel_size: integer, the dimension of the convolution window.
-        strides: integer, the stride of the convolution.
-        expand_ratio: integer, scaling coefficient for the input filters.
-        id_skip: boolean.
-
-    Returns:
-        output tensor for the block.
-    """
-    if name is None:
-        name = f"block_{backend.get_uid('block_')}_"
-
-    # Expansion phase
-    filters = filters_in * expand_ratio
-    if expand_ratio != 1:
-        x = layers.Conv2D(
-            filters,
-            1,
-            padding="same",
-            use_bias=False,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            name=name + "expand_conv",
-        )(inputs)
-        x = layers.BatchNormalization(axis=BN_AXIS, name=name + "expand_bn")(x)
-        x = layers.Activation(activation, name=name + "expand_activation")(x)
-    else:
-        x = inputs
-
-    # Depthwise Convolution
-    if strides == 2:
-        x = layers.ZeroPadding2D(
-            padding=correct_pad_downsample(x, kernel_size),
-            name=name + "dwconv_pad",
-        )(x)
-        conv_pad = "valid"
-    else:
-        conv_pad = "same"
-    x = layers.DepthwiseConv2D(
-        kernel_size,
-        strides=strides,
-        padding=conv_pad,
-        use_bias=False,
-        depthwise_initializer=CONV_KERNEL_INITIALIZER,
-        name=name + "dwconv",
-    )(x)
-    x = layers.BatchNormalization(axis=BN_AXIS, name=name + "bn")(x)
-    x = layers.Activation(activation, name=name + "activation")(x)
-
-    # Skip SE block
-    # Output phase
-    x = layers.Conv2D(
-        filters_out,
-        1,
-        padding="same",
-        use_bias=False,
-        kernel_initializer=CONV_KERNEL_INITIALIZER,
-        name=name + "project_conv",
-    )(x)
-    x = layers.BatchNormalization(axis=BN_AXIS, name=name + "project_bn")(x)
-    if id_skip and strides == 1 and filters_in == filters_out:
-        if drop_rate > 0:
-            x = layers.Dropout(
-                drop_rate, noise_shape=(None, 1, 1, 1), name=name + "drop"
-            )(x)
-        x = layers.add([x, inputs], name=name + "add")
-    return x
-
-
-@keras.utils.register_keras_serializable(package="keras_cv.models")
-class EfficientNetLite(keras.Model):
-    """Instantiates the EfficientNetLite architecture using given scaling
-    coefficients.
-
-    Args:
-        include_rescaling: whether to rescale the inputs. If set to True,
-            inputs will be passed through a `Rescaling(1/255.0)` layer.
-        include_top: whether to include the fully-connected
-            layer at the top of the network.
-        width_coefficient: float, scaling coefficient for network width.
-        depth_coefficient: float, scaling coefficient for network depth.
-        default_size: integer, default input image size.
-        dropout_rate: float, dropout rate before final classifier layer.
-        drop_connect_rate: float, dropout rate at skip connections.
-        depth_divisor: integer, a unit of network width.
-        activation: activation function.
-        blocks_args: list of dicts, parameters to construct block modules.
-        model_name: string, model name.
-        weights: one of `None` (random initialization),
-            or the path to the weights file to be loaded.
-        input_shape: optional shape tuple,
-            It should have exactly 3 inputs channels.
-        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
-            to use as image input for the model.
-        pooling: optional pooling mode for feature extraction
-            when `include_top` is `False`.
-            - `None` means that the output of the model will be
-                the 4D tensor output of the
-                last convolutional layer.
-            - `avg` means that global average pooling
-                will be applied to the output of the
-                last convolutional layer, and thus
-                the output of the model will be a 2D tensor.
-            - `max` means that global max pooling will
-                be applied.
-        num_classes: optional number of classes to classify images
-            into, only to be specified if `include_top` is True, and
-            if no `weights` argument is specified.
-        classifier_activation: A `str` or callable. The activation function to
-            use on the "top" layer. Ignored unless `include_top=True`. Set
-            `classifier_activation=None` to return the logits of the "top"
-            layer.
-
-        Returns:
-            A `keras.Model` instance.
-
-        Raises:
-            ValueError: if `blocks_args` is invalid.
-            ValueError: in case of invalid argument for `weights`,
-                or invalid input shape.
-            ValueError: if `classifier_activation` is not `softmax` or `None`
-                when using a pretrained top layer.
-    """
-
-    def __init__(
-        self,
-        include_rescaling,
-        include_top,
-        width_coefficient,
-        depth_coefficient,
-        default_size,
-        dropout_rate=0.2,
-        drop_connect_rate=0.2,
-        depth_divisor=8,
-        activation="relu6",
-        blocks_args=None,
-        weights=None,
-        input_shape=(None, None, 3),
-        input_tensor=None,
-        pooling=None,
-        num_classes=None,
-        classifier_activation="softmax",
-        **kwargs,
-    ):
-        if blocks_args is None:
-            blocks_args = DEFAULT_BLOCKS_ARGS
-        if not isinstance(blocks_args, list):
-            raise ValueError(
-                "The `blocks_args` argument should be either `None` or valid"
-                "list of dicts for building blocks. "
-                f"Received: blocks_args={blocks_args}"
-            )
-        intact_blocks_args = copy.deepcopy(blocks_args)  # for configs
-        blocks_args = copy.deepcopy(blocks_args)
-
-        if weights and not tf.io.gfile.exists(weights):
-            raise ValueError(
-                "The `weights` argument should be either `None` or the path to "
-                "the weights file to be loaded. "
-                f"Weights file not found at location: {weights}"
-            )
-
-        if include_top and not num_classes:
-            raise ValueError(
-                "If `include_top` is True, you should specify `num_classes`. "
-                f"Received: num_classes={num_classes}"
-            )
-
-        if include_top and pooling:
-            raise ValueError(
-                f"`pooling` must be `None` when `include_top=True`."
-                f"Received pooling={pooling} and include_top={include_top}. "
-            )
-
-        img_input = utils.parse_model_inputs(input_shape, input_tensor)
-
-        # Build stem
-        x = img_input
-
-        if include_rescaling:
-            # Use common rescaling strategy across keras_cv
-            x = layers.Rescaling(1.0 / 255.0)(x)
-
-        x = layers.ZeroPadding2D(
-            padding=correct_pad_downsample(x, 3), name="stem_conv_pad"
-        )(x)
-        x = layers.Conv2D(
-            32,
-            3,
-            strides=2,
-            padding="valid",
-            use_bias=False,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            name="stem_conv",
-        )(x)
-        x = layers.BatchNormalization(axis=BN_AXIS, name="stem_bn")(x)
-        x = layers.Activation(activation, name="stem_activation")(x)
-
-        # Build blocks
-        b = 0
-        blocks = float(sum(args["repeats"] for args in blocks_args))
-
-        for i, args in enumerate(blocks_args):
-            assert args["repeats"] > 0
-            # Update block input and output filters based on depth multiplier.
-            args["filters_in"] = round_filters(
-                filters=args["filters_in"],
-                width_coefficient=width_coefficient,
-                depth_divisor=depth_divisor,
-            )
-            args["filters_out"] = round_filters(
-                filters=args["filters_out"],
-                width_coefficient=width_coefficient,
-                depth_divisor=depth_divisor,
-            )
-
-            if i == 0 or i == (len(blocks_args) - 1):
-                repeats = args.pop("repeats")
-            else:
-                repeats = round_repeats(
-                    repeats=args.pop("repeats"),
-                    depth_coefficient=depth_coefficient,
-                )
-
-            for j in range(repeats):
-                # The first block needs to take care of stride and filter size
-                # increase.
-                if j > 0:
-                    args["strides"] = 1
-                    args["filters_in"] = args["filters_out"]
-                x = apply_efficient_net_lite_block(
-                    x,
-                    activation=activation,
-                    drop_rate=drop_connect_rate * b / blocks,
-                    name="block{}{}_".format(i + 1, chr(j + 97)),
-                    **args,
-                )
-
-                b += 1
-
-        # Build top
-        x = layers.Conv2D(
-            1280,
-            1,
-            padding="same",
-            use_bias=False,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            name="top_conv",
-        )(x)
-        x = layers.BatchNormalization(axis=BN_AXIS, name="top_bn")(x)
-        x = layers.Activation(activation, name="top_activation")(x)
-
-        if include_top:
-            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
-            if dropout_rate > 0:
-                x = layers.Dropout(dropout_rate, name="top_dropout")(x)
-            x = layers.Dense(
-                num_classes,
-                activation=classifier_activation,
-                kernel_initializer=DENSE_KERNEL_INITIALIZER,
-                name="predictions",
-            )(x)
-        else:
-            if pooling == "avg":
-                x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
-            elif pooling == "max":
-                x = layers.GlobalMaxPooling2D(name="max_pool")(x)
-
-        inputs = img_input
-
-        # Create model.
-        super().__init__(inputs=inputs, outputs=x, **kwargs)
-
-        # Load weights.
-        if weights is not None:
-            self.load_weights(weights)
-
-        self.include_rescaling = include_rescaling
-        self.include_top = include_top
-        self.width_coefficient = width_coefficient
-        self.depth_coefficient = depth_coefficient
-        self.default_size = default_size
-        self.dropout_rate = dropout_rate
-        self.drop_connect_rate = drop_connect_rate
-        self.depth_divisor = depth_divisor
-        self.activation = activation
-        self.blocks_args = intact_blocks_args
-        self.input_tensor = input_tensor
-        self.pooling = pooling
-        self.num_classes = num_classes
-        self.classifier_activation = classifier_activation
-
-    def get_config(self):
-        return {
-            "include_rescaling": self.include_rescaling,
-            "include_top": self.include_top,
-            "width_coefficient": self.width_coefficient,
-            "depth_coefficient": self.depth_coefficient,
-            "default_size": self.default_size,
-            "dropout_rate": self.dropout_rate,
-            "drop_connect_rate": self.drop_connect_rate,
-            "depth_divisor": self.depth_divisor,
-            "activation": self.activation,
-            "blocks_args": self.blocks_args,
-            # Remove batch dimension from `input_shape`
-            "input_shape": self.input_shape[1:],
-            "input_tensor": self.input_tensor,
-            "pooling": self.pooling,
-            "num_classes": self.num_classes,
-            "classifier_activation": self.classifier_activation,
-            "name": self.name,
-            "trainable": self.trainable,
-        }
-
-    @classmethod
-    def from_config(cls, config):
-        return cls(**config)
-
-
-def EfficientNetLiteB0(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    **kwargs,
-):
-    return EfficientNetLite(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.0,
-        depth_coefficient=1.0,
-        default_size=224,
-        dropout_rate=0.2,
-        name="efficientnetliteb0",
-        weights=parse_weights(weights, include_top, "efficientnetliteb0"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetLiteB1(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    **kwargs,
-):
-    return EfficientNetLite(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.0,
-        depth_coefficient=1.1,
-        default_size=240,
-        dropout_rate=0.2,
-        name="efficientnetliteb1",
-        weights=parse_weights(weights, include_top, "efficientnetliteb1"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetLiteB2(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    **kwargs,
-):
-    return EfficientNetLite(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.1,
-        depth_coefficient=1.2,
-        default_size=260,
-        dropout_rate=0.3,
-        name="efficientnetliteb2",
-        weights=parse_weights(weights, include_top, "efficientnetliteb2"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetLiteB3(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    **kwargs,
-):
-    return EfficientNetLite(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.2,
-        depth_coefficient=1.4,
-        default_size=280,
-        dropout_rate=0.3,
-        name="efficientnetliteb3",
-        weights=parse_weights(weights, include_top, "efficientnetliteb3"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetLiteB4(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    **kwargs,
-):
-    return EfficientNetLite(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.4,
-        depth_coefficient=1.8,
-        default_size=300,
-        dropout_rate=0.3,
-        name="efficientnetliteb4",
-        weights=parse_weights(weights, include_top, "efficientnetliteb4"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-EfficientNetLiteB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB0")
-EfficientNetLiteB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB1")
-EfficientNetLiteB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB2")
-EfficientNetLiteB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB3")
-EfficientNetLiteB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetLiteB4")
diff --git a/keras_cv/models/legacy/efficientnet_lite_test.py b/keras_cv/models/legacy/efficientnet_lite_test.py
deleted file mode 100644
index daa1d0e2c0..0000000000
--- a/keras_cv/models/legacy/efficientnet_lite_test.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2023 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from absl.testing import parameterized
-
-from keras_cv.models.legacy import efficientnet_lite
-from keras_cv.tests.test_case import TestCase
-
-from .models_test import ModelsTest
-
-MODEL_LIST = [
-    (efficientnet_lite.EfficientNetLiteB0, 1280, {}),
-]
-
-"""
-Below are other configurations that we omit from our CI but that can/should
-be tested manually when making changes to this model.
-(efficientnet_lite.EfficientNetLiteB1, 1280, {}),
-(efficientnet_lite.EfficientNetLiteB2, 1280, {}),
-(efficientnet_lite.EfficientNetLiteB3, 1280, {}),
-(efficientnet_lite.EfficientNetLiteB4, 1280, {}),
-"""
-
-
-class EfficientNetLiteTest(ModelsTest, TestCase):
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_base(self, app, _, args):
-        super()._test_application_base(app, _, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_with_rescaling(self, app, last_dim, args):
-        super()._test_application_with_rescaling(app, last_dim, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_pooling(self, app, last_dim, args):
-        super()._test_application_pooling(app, last_dim, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_variable_input_channels(self, app, last_dim, args):
-        super()._test_application_variable_input_channels(app, last_dim, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_model_can_be_used_as_backbone(self, app, last_dim, args):
-        super()._test_model_can_be_used_as_backbone(app, last_dim, args)

From 1602b17eed20b187e6bb0f4656f1fbeff57947b2 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Thu, 17 Aug 2023 20:47:24 +0530
Subject: [PATCH 06/17] Migrate Efficientnetv1 to Backbone  (#1716)

* created new files

* moved old files to new one

* efficientnetv1 backbone, presets, and imports updated

* fix imports

* fix imports

* added preset unit test

* updated backbone

* fix init

* fix legacy init

* fix aliases presets

* fix backbone round filter argument

* fix depthwise conv

* fix conv kernel initializer

* fix depthwiseconv

* fix scope name match pattern error

* fix scope name

* fix block name

* remove block args preset

* remove model_name

* remove default_size from preset

* updated test cases

* updated docs

* fix id_skip

* fix test

* fix format

* reviewd comments

* fix format

* fix typo

* fix naming

* test with layer-matching

* fix typo

* fix format

* backbone test updated

* review changes

* port

* fix port

* fix port 2

* port: fix argument

* port: final fix

* port:docs typo

* port: update

* review comment

* format
---
 keras_cv/models/__init__.py                   |  27 +
 keras_cv/models/backbones/backbone_presets.py |   5 +
 .../backbones/efficientnet_v1/__init__.py     |  13 +
 .../efficientnet_v1_aliases.py                | 315 ++++++
 .../efficientnet_v1_backbone.py               | 454 +++++++++
 .../efficientnet_v1_backbone_presets.py       | 337 +++++++
 .../efficientnet_v1_backbone_presets_test.py  |  60 ++
 .../efficientnet_v1_backbone_test.py          | 198 ++++
 keras_cv/models/legacy/__init__.py            |   8 -
 keras_cv/models/legacy/efficientnet_v1.py     | 937 ------------------
 .../models/legacy/efficientnet_v1_test.py     |  58 --
 11 files changed, 1409 insertions(+), 1003 deletions(-)
 create mode 100644 keras_cv/models/backbones/efficientnet_v1/__init__.py
 create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py
 create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
 create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py
 create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py
 create mode 100644 keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py
 delete mode 100644 keras_cv/models/legacy/efficientnet_v1.py
 delete mode 100644 keras_cv/models/legacy/efficientnet_v1_test.py

diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py
index 1861b49c03..4191c07575 100644
--- a/keras_cv/models/__init__.py
+++ b/keras_cv/models/__init__.py
@@ -61,6 +61,33 @@
 from keras_cv.models.backbones.efficientnet_lite.efficientnet_lite_backbone import (  # noqa: E501
     EfficientNetLiteBackbone,
 )
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B1Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B2Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B3Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B4Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B5Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B6Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B7Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1Backbone,
+)
 from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import (
     EfficientNetV2B0Backbone,
 )
diff --git a/keras_cv/models/backbones/backbone_presets.py b/keras_cv/models/backbones/backbone_presets.py
index 3852b28877..614f85cd24 100644
--- a/keras_cv/models/backbones/backbone_presets.py
+++ b/keras_cv/models/backbones/backbone_presets.py
@@ -19,6 +19,9 @@
 from keras_cv.models.backbones.efficientnet_lite import (
     efficientnet_lite_backbone_presets,
 )
+from keras_cv.models.backbones.efficientnet_v1 import (
+    efficientnet_v1_backbone_presets,
+)
 from keras_cv.models.backbones.efficientnet_v2 import (
     efficientnet_v2_backbone_presets,
 )
@@ -32,6 +35,7 @@
     **resnet_v2_backbone_presets.backbone_presets_no_weights,
     **mobilenet_v3_backbone_presets.backbone_presets_no_weights,
     **csp_darknet_backbone_presets.backbone_presets_no_weights,
+    **efficientnet_v1_backbone_presets.backbone_presets_no_weights,
     **efficientnet_v2_backbone_presets.backbone_presets_no_weights,
     **densenet_backbone_presets.backbone_presets_no_weights,
     **efficientnet_lite_backbone_presets.backbone_presets_no_weights,
@@ -43,6 +47,7 @@
     **resnet_v2_backbone_presets.backbone_presets_with_weights,
     **mobilenet_v3_backbone_presets.backbone_presets_with_weights,
     **csp_darknet_backbone_presets.backbone_presets_with_weights,
+    **efficientnet_v1_backbone_presets.backbone_presets_with_weights,
     **efficientnet_v2_backbone_presets.backbone_presets_with_weights,
     **densenet_backbone_presets.backbone_presets_with_weights,
     **efficientnet_lite_backbone_presets.backbone_presets_with_weights,
diff --git a/keras_cv/models/backbones/efficientnet_v1/__init__.py b/keras_cv/models/backbones/efficientnet_v1/__init__.py
new file mode 100644
index 0000000000..3992ffb59a
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py
new file mode 100644
index 0000000000..587c0e70ff
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_aliases.py
@@ -0,0 +1,315 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import (
+    EfficientNetV1Backbone,
+)
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """Instantiates the {name} architecture.
+
+    Reference:
+    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+      (ICML 2019)
+
+    Args:
+        include_rescaling: bool, whether to rescale the inputs. If set
+            to `True`, inputs will be passed through a `Rescaling(1/255.0)`
+            layer.
+        input_shape: optional shape tuple, defaults to (None, None, 3).
+        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+            to use as image input for the model.
+"""  # noqa: E501
+
+
+class EfficientNetV1B0Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b0", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B1Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b1", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B2Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b2", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B3Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b3", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B4Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b4", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B5Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b5", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B6Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b6", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+class EfficientNetV1B7Backbone(EfficientNetV1Backbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return EfficientNetV1Backbone.from_preset("efficientnetv1_b7", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return {}
+
+
+setattr(
+    EfficientNetV1B0Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B0"),
+)
+setattr(
+    EfficientNetV1B1Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B1"),
+)
+setattr(
+    EfficientNetV1B2Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B2"),
+)
+setattr(
+    EfficientNetV1B3Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B3"),
+)
+setattr(
+    EfficientNetV1B4Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B4"),
+)
+setattr(
+    EfficientNetV1B5Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B5"),
+)
+setattr(
+    EfficientNetV1B6Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B6"),
+)
+setattr(
+    EfficientNetV1B7Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV1B7"),
+)
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
new file mode 100644
index 0000000000..cc39d3d31c
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
@@ -0,0 +1,454 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import math
+
+from keras_cv.backend import keras
+from keras_cv.models import utils
+from keras_cv.models.backbones.backbone import Backbone
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone_presets import (  # noqa: E501
+    backbone_presets,
+)
+from keras_cv.utils.python_utils import classproperty
+
+
+@keras.saving.register_keras_serializable(package="keras_cv.models")
+class EfficientNetV1Backbone(Backbone):
+    """Instantiates the EfficientNetV1 architecture.
+
+    Reference:
+    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+        (ICML 2019)
+    - [Based on the original keras.applications EfficientNet](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet.py)
+
+    Args:
+        include_rescaling: bool, whether to rescale the inputs. If set to
+            True, inputs will be passed through a `Rescaling(1/255.0)` layer.
+        width_coefficient: float, scaling coefficient for network width.
+        depth_coefficient: float, scaling coefficient for network depth.
+        dropout_rate: float, dropout rate before final classifier layer.
+        drop_connect_rate: float, dropout rate at skip connections. The default
+            value is set to 0.2.
+        depth_divisor: integer, a unit of network width. The default value is
+            set to 8.
+        activation: activation function to use between each convolutional layer.
+        input_shape: optional shape tuple, it should have exactly 3 input
+            channels.
+        input_tensor: optional Keras tensor (i.e. output of `keras.keras.layers.Input()`) to
+            use as image input for the model.
+        stackwise_kernel_sizes:  list of ints, the kernel sizes used for each
+            conv block.
+        stackwise_num_repeats: list of ints, number of times to repeat each
+            conv block.
+        stackwise_input_filters: list of ints, number of input filters for
+            each conv block.
+        stackwise_output_filters: list of ints, number of output filters for
+            each stack in the conv blocks model.
+        stackwise_expansion_ratios: list of floats, expand ratio passed to the
+            squeeze and excitation blocks.
+        stackwise_strides: list of ints, stackwise_strides for each conv block.
+        stackwise_squeeze_and_excite_ratios: list of ints, the squeeze and
+            excite ratios passed to the squeeze and excitation blocks.
+
+    Usage:
+    ```python
+    # Construct an EfficientNetV1 from a preset:
+    efficientnet = keras_cv.models.EfficientNetV1Backbone.from_preset(
+        "efficientnetv1_b0"
+    )
+    images = np.ones((1, 256, 256, 3))
+    outputs = efficientnet.predict(images)
+
+    # Alternatively, you can also customize the EfficientNetV1 architecture:
+    model = EfficientNetV1Backbone(
+        stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+        stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+        stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+        stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+        stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+        stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+        stackwise_squeeze_and_excite_ratios=[
+            0.25,
+            0.25,
+            0.25,
+            0.25,
+            0.25,
+            0.25,
+            0.25,
+        ],
+        width_coefficient=1.0,
+        depth_coefficient=1.0,
+        include_rescaling=False,
+    )
+    images = np.ones((1, 256, 256, 3))
+    outputs = efficientnet.predict(images)
+    ```
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        *,
+        include_rescaling,
+        width_coefficient,
+        depth_coefficient,
+        stackwise_kernel_sizes,
+        stackwise_num_repeats,
+        stackwise_input_filters,
+        stackwise_output_filters,
+        stackwise_expansion_ratios,
+        stackwise_strides,
+        stackwise_squeeze_and_excite_ratios,
+        dropout_rate=0.2,
+        drop_connect_rate=0.2,
+        depth_divisor=8,
+        input_shape=(None, None, 3),
+        input_tensor=None,
+        activation="swish",
+        **kwargs,
+    ):
+        img_input = utils.parse_model_inputs(input_shape, input_tensor)
+
+        x = img_input
+
+        if include_rescaling:
+            # Use common rescaling strategy across keras_cv
+            x = keras.layers.Rescaling(1.0 / 255.0)(x)
+
+        x = keras.layers.ZeroPadding2D(
+            padding=utils.correct_pad_downsample(x, 3), name="stem_conv_pad"
+        )(x)
+
+        # Build stem
+        stem_filters = round_filters(
+            filters=stackwise_input_filters[0],
+            width_coefficient=width_coefficient,
+            divisor=depth_divisor,
+        )
+        x = keras.layers.Conv2D(
+            filters=stem_filters,
+            kernel_size=3,
+            strides=2,
+            padding="valid",
+            use_bias=False,
+            kernel_initializer=conv_kernel_initializer(),
+            name="stem_conv",
+        )(x)
+        x = keras.layers.BatchNormalization(
+            axis=3,
+            name="stem_bn",
+        )(x)
+        x = keras.layers.Activation(activation, name="stem_activation")(x)
+
+        # Build blocks
+        block_id = 0
+        blocks = float(sum(stackwise_num_repeats))
+
+        pyramid_level_inputs = []
+        for i in range(len(stackwise_kernel_sizes)):
+            num_repeats = stackwise_num_repeats[i]
+            input_filters = stackwise_input_filters[i]
+            output_filters = stackwise_output_filters[i]
+
+            # Update block input and output filters based on depth multiplier.
+            input_filters = round_filters(
+                filters=input_filters,
+                width_coefficient=width_coefficient,
+                divisor=depth_divisor,
+            )
+            output_filters = round_filters(
+                filters=output_filters,
+                width_coefficient=width_coefficient,
+                divisor=depth_divisor,
+            )
+
+            repeats = round_repeats(
+                repeats=num_repeats,
+                depth_coefficient=depth_coefficient,
+            )
+            strides = stackwise_strides[i]
+            squeeze_and_excite_ratio = stackwise_squeeze_and_excite_ratios[i]
+
+            for j in range(repeats):
+                # The first block needs to take care of stride and filter size
+                # increase.
+                if j > 0:
+                    strides = 1
+                    input_filters = output_filters
+
+                if strides != 1:
+                    pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+                # 97 is the start of the lowercase alphabet.
+                letter_identifier = chr(j + 97)
+                x = apply_efficientnet_block(
+                    inputs=x,
+                    filters_in=input_filters,
+                    filters_out=output_filters,
+                    kernel_size=stackwise_kernel_sizes[i],
+                    strides=strides,
+                    expand_ratio=stackwise_expansion_ratios[i],
+                    se_ratio=squeeze_and_excite_ratio,
+                    activation=activation,
+                    dropout_rate=drop_connect_rate * block_id / blocks,
+                    name="block{}{}_".format(i + 1, letter_identifier),
+                )
+                block_id += 1
+
+        # Build top
+        top_filters = round_filters(
+            filters=1280,
+            width_coefficient=width_coefficient,
+            divisor=depth_divisor,
+        )
+
+        x = keras.layers.Conv2D(
+            filters=top_filters,
+            kernel_size=1,
+            padding="same",
+            strides=1,
+            kernel_initializer=conv_kernel_initializer(),
+            use_bias=False,
+            name="top_conv",
+        )(x)
+        x = keras.layers.BatchNormalization(
+            axis=3,
+            name="top_bn",
+        )(x)
+        x = keras.layers.Activation(
+            activation=activation, name="top_activation"
+        )(x)
+
+        pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+        # Create model.
+        super().__init__(inputs=img_input, outputs=x, **kwargs)
+
+        self.include_rescaling = include_rescaling
+        self.width_coefficient = width_coefficient
+        self.depth_coefficient = depth_coefficient
+        self.dropout_rate = dropout_rate
+        self.drop_connect_rate = drop_connect_rate
+        self.depth_divisor = depth_divisor
+        self.activation = activation
+        self.input_tensor = input_tensor
+        self.pyramid_level_inputs = {
+            f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs)
+        }
+        self.stackwise_kernel_sizes = stackwise_kernel_sizes
+        self.stackwise_num_repeats = stackwise_num_repeats
+        self.stackwise_input_filters = stackwise_input_filters
+        self.stackwise_output_filters = stackwise_output_filters
+        self.stackwise_expansion_ratios = stackwise_expansion_ratios
+        self.stackwise_strides = stackwise_strides
+        self.stackwise_squeeze_and_excite_ratios = (
+            stackwise_squeeze_and_excite_ratios
+        )
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "include_rescaling": self.include_rescaling,
+                "width_coefficient": self.width_coefficient,
+                "depth_coefficient": self.depth_coefficient,
+                "dropout_rate": self.dropout_rate,
+                "drop_connect_rate": self.drop_connect_rate,
+                "depth_divisor": self.depth_divisor,
+                "activation": self.activation,
+                "input_tensor": self.input_tensor,
+                "input_shape": self.input_shape[1:],
+                "trainable": self.trainable,
+                "stackwise_kernel_sizes": self.stackwise_kernel_sizes,
+                "stackwise_num_repeats": self.stackwise_num_repeats,
+                "stackwise_input_filters": self.stackwise_input_filters,
+                "stackwise_output_filters": self.stackwise_output_filters,
+                "stackwise_expansion_ratios": self.stackwise_expansion_ratios,
+                "stackwise_strides": self.stackwise_strides,
+                "stackwise_squeeze_and_excite_ratios": (
+                    self.stackwise_squeeze_and_excite_ratios
+                ),
+            }
+        )
+        return config
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return copy.deepcopy(backbone_presets)
+
+
+def conv_kernel_initializer(scale=2.0):
+    return keras.initializers.VarianceScaling(
+        scale=scale, mode="fan_out", distribution="truncated_normal"
+    )
+
+
+def round_filters(filters, width_coefficient, divisor):
+    """Round number of filters based on depth multiplier.
+
+    Args:
+        filters: int, number of filters for Conv layer
+        width_coefficient: float, denotes the scaling coefficient of network
+            width
+        divisor: int, a unit of network width
+
+    Returns:
+        int, new rounded filters value for Conv layer
+    """
+    filters *= width_coefficient
+    new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_filters < 0.9 * filters:
+        new_filters += divisor
+    return int(new_filters)
+
+
+def round_repeats(repeats, depth_coefficient):
+    """Round number of repeats based on depth multiplier.
+
+    Args:
+        repeats: int, number of repeats of efficientnet block
+        depth_coefficient: float, denotes the scaling coefficient of network
+            depth
+
+    Returns:
+        int, rounded repeats
+    """
+    return int(math.ceil(depth_coefficient * repeats))
+
+
+def apply_efficientnet_block(
+    inputs,
+    filters_in=32,
+    filters_out=16,
+    kernel_size=3,
+    strides=1,
+    activation="swish",
+    expand_ratio=1,
+    se_ratio=0.0,
+    dropout_rate=0.0,
+    name="",
+):
+    """An inverted residual block.
+
+    Args:
+        inputs: Tensor, The input tensor of the block
+        filters_in: integer, the number of input filters.
+        filters_out: integer, the number of output filters.
+        kernel_size: integer, the dimension of the convolution window.
+        strides: integer, the stride of the convolution.
+        activation: activation function to use between each convolutional layer.
+        expand_ratio: integer, scaling coefficient for the input filters.
+        se_ratio: float between 0 and 1, fraction to squeeze the input filters.
+        dropout_rate: float between 0 and 1, fraction of the input units to drop.
+        name: string, block label.
+
+    Returns:
+        output tensor for the block.
+    """  # noqa: E501
+    filters = filters_in * expand_ratio
+    if expand_ratio != 1:
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=1,
+            strides=1,
+            padding="same",
+            use_bias=False,
+            kernel_initializer=conv_kernel_initializer(),
+            name=name + "expand_conv",
+        )(inputs)
+        x = keras.layers.BatchNormalization(
+            axis=3,
+            name=name + "expand_bn",
+        )(x)
+        x = keras.layers.Activation(
+            activation, name=name + "expand_activation"
+        )(x)
+    else:
+        x = inputs
+
+    # Depthwise Convolution
+    if strides == 2:
+        x = keras.layers.ZeroPadding2D(
+            padding=utils.correct_pad_downsample(x, kernel_size),
+            name=name + "dwconv_pad",
+        )(x)
+        conv_pad = "valid"
+    else:
+        conv_pad = "same"
+
+    x = keras.layers.DepthwiseConv2D(
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=conv_pad,
+        use_bias=False,
+        depthwise_initializer=conv_kernel_initializer(),
+        name=name + "dwconv",
+    )(x)
+    x = keras.layers.BatchNormalization(
+        axis=3,
+        name=name + "dwconv_bn",
+    )(x)
+    x = keras.layers.Activation(activation, name=name + "dwconv_activation")(x)
+
+    # Squeeze and Excitation phase
+    if 0 < se_ratio <= 1:
+        filters_se = max(1, int(filters_in * se_ratio))
+        se = keras.layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
+        se_shape = (1, 1, filters)
+        se = keras.layers.Reshape(se_shape, name=name + "se_reshape")(se)
+        se = keras.layers.Conv2D(
+            filters_se,
+            1,
+            padding="same",
+            activation=activation,
+            kernel_initializer=conv_kernel_initializer(),
+            name=name + "se_reduce",
+        )(se)
+        se = keras.layers.Conv2D(
+            filters,
+            1,
+            padding="same",
+            activation="sigmoid",
+            kernel_initializer=conv_kernel_initializer(),
+            name=name + "se_expand",
+        )(se)
+        x = keras.layers.multiply([x, se], name=name + "se_excite")
+
+    # Output phase
+    x = keras.layers.Conv2D(
+        filters=filters_out,
+        kernel_size=1,
+        strides=1,
+        padding="same",
+        use_bias=False,
+        kernel_initializer=conv_kernel_initializer(),
+        name=name + "project",
+    )(x)
+    x = keras.layers.BatchNormalization(
+        axis=3,
+        name=name + "project_bn",
+    )(x)
+    x = keras.layers.Activation(activation, name=name + "project_activation")(x)
+
+    if strides == 1 and filters_in == filters_out:
+        if dropout_rate > 0:
+            x = keras.layers.Dropout(
+                dropout_rate,
+                noise_shape=(None, 1, 1, 1),
+                name=name + "drop",
+            )(x)
+        x = keras.layers.add([x, inputs], name=name + "add")
+
+    return x
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py
new file mode 100644
index 0000000000..a2aac81d26
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets.py
@@ -0,0 +1,337 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""EfficientNetV1 model preset configurations."""
+
+backbone_presets_no_weights = {
+    "efficientnetv1_b0": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.0` and `depth_coefficient=1.0`."
+            ),
+            "params": 4050716,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.0,
+            "depth_coefficient": 1.0,
+            "dropout_rate": 0.2,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b1": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.0` and `depth_coefficient=1.1`."
+            ),
+            "params": 6576704,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.0,
+            "depth_coefficient": 1.1,
+            "dropout_rate": 0.2,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b2": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.1` and `depth_coefficient=1.2`."
+            ),
+            "params": 7770034,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.1,
+            "depth_coefficient": 1.2,
+            "dropout_rate": 0.3,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b3": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.2` and `depth_coefficient=1.4`."
+            ),
+            "params": 10785960,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.2,
+            "depth_coefficient": 1.4,
+            "dropout_rate": 0.3,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b4": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.4` and `depth_coefficient=1.8`."
+            ),
+            "params": 17676984,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.4,
+            "depth_coefficient": 1.8,
+            "dropout_rate": 0.4,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b5": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.6` and `depth_coefficient=2.2`."
+            ),
+            "params": 28517360,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.6,
+            "depth_coefficient": 2.2,
+            "dropout_rate": 0.4,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b6": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=1.8` and `depth_coefficient=2.6`."
+            ),
+            "params": 40965800,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 1.8,
+            "depth_coefficient": 2.6,
+            "dropout_rate": 0.5,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+    "efficientnetv1_b7": {
+        "metadata": {
+            "description": (
+                "EfficientNet B-style architecture with 7 "
+                "convolutional blocks. This B-style model has "
+                "`width_coefficient=2.0` and `depth_coefficient=3.1`."
+            ),
+            "params": 64105488,
+            "official_name": "EfficientNetV1",
+            "path": "efficientnetv1",
+        },
+        "class_name": "keras_cv.models>EfficientNetV1Backbone",
+        "config": {
+            "width_coefficient": 2.0,
+            "depth_coefficient": 3.1,
+            "dropout_rate": 0.5,
+            "drop_connect_rate": 0.2,
+            "depth_divisor": 8,
+            "stackwise_kernel_sizes": [3, 3, 5, 3, 5, 5, 3],
+            "stackwise_num_repeats": [1, 2, 2, 3, 3, 4, 1],
+            "stackwise_input_filters": [32, 16, 24, 40, 80, 112, 192],
+            "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
+            "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
+            "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
+            "stackwise_squeeze_and_excite_ratios": [
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "include_rescaling": True,
+            "input_shape": (None, None, 3),
+            "input_tensor": None,
+            "activation": "swish",
+        },
+    },
+}
+
+backbone_presets_with_weights = {}
+
+backbone_presets = {
+    **backbone_presets_no_weights,
+    **backbone_presets_with_weights,
+}
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py
new file mode 100644
index 0000000000..fd73068311
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py
@@ -0,0 +1,60 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import (
+    EfficientNetV1Backbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+@pytest.mark.extra_large
+class EfficientNetV1PresetFullTest(TestCase):
+    """
+    Test the full enumeration of our preset.
+    This every presets for EfficientNetV1 and is only run manually.
+    Run with:
+    `pytest keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_presets_test.py --run_extra_large`
+    """  # noqa: E501
+
+    @parameterized.named_parameters(
+        *[(preset, preset) for preset in EfficientNetV1Backbone.presets]
+    )
+    def test_load_efficientnet(self, preset):
+        input_data = np.ones(shape=(2, 224, 224, 3))
+        model = EfficientNetV1Backbone.from_preset(preset)
+        model(input_data)
+
+    def test_efficientnet_feature_extractor(self):
+        model = EfficientNetV1B0Backbone(
+            include_rescaling=False,
+            input_shape=[256, 256, 3],
+        )
+        levels = ["P3", "P4"]
+        layer_names = [model.pyramid_level_inputs[level] for level in levels]
+        backbone_model = get_feature_extractor(model, layer_names, levels)
+        inputs = keras.Input(shape=[256, 256, 3])
+        outputs = backbone_model(inputs)
+        self.assertLen(outputs, 2)
+        self.assertEquals(list(outputs.keys()), levels)
+        self.assertEquals(outputs["P3"].shape[:3], (None, 32, 32))
+        self.assertEquals(outputs["P4"].shape[:3], (None, 16, 16))
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py
new file mode 100644
index 0000000000..ac8a8dfa81
--- /dev/null
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone_test.py
@@ -0,0 +1,198 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_aliases import (
+    EfficientNetV1B0Backbone,
+)
+from keras_cv.models.backbones.efficientnet_v1.efficientnet_v1_backbone import (
+    EfficientNetV1Backbone,
+)
+from keras_cv.tests.test_case import TestCase
+from keras_cv.utils.train import get_feature_extractor
+
+
+class EfficientNetV1BackboneTest(TestCase):
+    def setUp(self):
+        self.input_batch = np.ones(shape=(8, 224, 224, 3))
+
+    def test_valid_call(self):
+        model = EfficientNetV1Backbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            stackwise_squeeze_and_excite_ratios=[
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=False,
+        )
+        model(self.input_batch)
+
+    def test_valid_call_alias_model_with_rescaling(self):
+        model = EfficientNetV1B0Backbone(include_rescaling=True)
+        model(self.input_batch)
+
+    def test_valid_call_with_rescaling(self):
+        model = EfficientNetV1Backbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            stackwise_squeeze_and_excite_ratios=[
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=True,
+        )
+        model(self.input_batch)
+
+    @pytest.mark.large  # Saving is slow, so mark these large.
+    def test_saved_model(self):
+        model = EfficientNetV1Backbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            stackwise_squeeze_and_excite_ratios=[
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=True,
+        )
+        model_output = model(self.input_batch)
+        save_path = os.path.join(
+            self.get_temp_dir(), "efficientnet_v1_backbone.keras"
+        )
+        model.save(save_path)
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        self.assertIsInstance(restored_model, EfficientNetV1Backbone)
+
+        # Check that output matches.
+        restored_output = restored_model(self.input_batch)
+        self.assertAllClose(model_output, restored_output)
+
+    @pytest.mark.large  # Saving is slow, so mark these large.
+    def test_saved_alias_model(self):
+        model = EfficientNetV1B0Backbone()
+        model_output = model(self.input_batch)
+        save_path = os.path.join(
+            self.get_temp_dir(), "efficientnet_v1_backbone.keras"
+        )
+        model.save(save_path)
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        # Note that these aliases serialized as the base class
+        self.assertIsInstance(restored_model, EfficientNetV1Backbone)
+
+        # Check that output matches.
+        restored_output = restored_model(self.input_batch)
+        self.assertAllClose(model_output, restored_output)
+
+    def test_feature_pyramid_inputs(self):
+        model = EfficientNetV1B0Backbone()
+        backbone_model = get_feature_extractor(
+            model,
+            model.pyramid_level_inputs.values(),
+            model.pyramid_level_inputs.keys(),
+        )
+        input_size = 256
+        inputs = keras.Input(shape=[input_size, input_size, 3])
+        outputs = backbone_model(inputs)
+        levels = ["P1", "P2", "P3", "P4", "P5"]
+        self.assertEquals(list(outputs.keys()), levels)
+        self.assertEquals(
+            outputs["P1"].shape,
+            (None, input_size // 2**1, input_size // 2**1, 16),
+        )
+        self.assertEquals(
+            outputs["P2"].shape,
+            (None, input_size // 2**2, input_size // 2**2, 24),
+        )
+        self.assertEquals(
+            outputs["P3"].shape,
+            (None, input_size // 2**3, input_size // 2**3, 40),
+        )
+        self.assertEquals(
+            outputs["P4"].shape,
+            (None, input_size // 2**4, input_size // 2**4, 112),
+        )
+        self.assertEquals(
+            outputs["P5"].shape,
+            (None, input_size // 2**5, input_size // 2**5, 1280),
+        )
+
+    @parameterized.named_parameters(
+        ("one_channel", 1),
+        ("four_channels", 4),
+    )
+    def test_application_variable_input_channels(self, num_channels):
+        model = EfficientNetV1Backbone(
+            stackwise_kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
+            stackwise_num_repeats=[1, 2, 2, 3, 3, 4, 1],
+            stackwise_input_filters=[32, 16, 24, 40, 80, 112, 192],
+            stackwise_output_filters=[16, 24, 40, 80, 112, 192, 320],
+            stackwise_expansion_ratios=[1, 6, 6, 6, 6, 6, 6],
+            stackwise_strides=[1, 2, 2, 2, 1, 2, 1],
+            stackwise_squeeze_and_excite_ratios=[
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            width_coefficient=1.0,
+            depth_coefficient=1.0,
+            include_rescaling=True,
+        )
+        self.assertEqual(model.output_shape, (None, None, None, 1280))
diff --git a/keras_cv/models/legacy/__init__.py b/keras_cv/models/legacy/__init__.py
index 20df5826f0..419ae34b31 100644
--- a/keras_cv/models/legacy/__init__.py
+++ b/keras_cv/models/legacy/__init__.py
@@ -24,14 +24,6 @@
 from keras_cv.models.legacy.convnext import ConvNeXtXLarge
 from keras_cv.models.legacy.darknet import DarkNet21
 from keras_cv.models.legacy.darknet import DarkNet53
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB0
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB1
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB2
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB3
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB4
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB5
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB6
-from keras_cv.models.legacy.efficientnet_v1 import EfficientNetB7
 from keras_cv.models.legacy.mlp_mixer import MLPMixerB16
 from keras_cv.models.legacy.mlp_mixer import MLPMixerB32
 from keras_cv.models.legacy.mlp_mixer import MLPMixerL16
diff --git a/keras_cv/models/legacy/efficientnet_v1.py b/keras_cv/models/legacy/efficientnet_v1.py
deleted file mode 100644
index b91a63697a..0000000000
--- a/keras_cv/models/legacy/efficientnet_v1.py
+++ /dev/null
@@ -1,937 +0,0 @@
-# Copyright 2022 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""EfficientNet models for Keras.
-
-Reference:
-    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
-        (ICML 2019)
-    - [Based on the original keras.applications EfficientNet](https://github.com/keras-team/keras/blob/master/keras/applications/efficientnet.py)
-"""  # noqa: E501
-
-import copy
-import math
-
-import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras import backend
-from tensorflow.keras import layers
-
-from keras_cv.models.legacy import utils
-from keras_cv.models.legacy.weights import parse_weights
-
-DEFAULT_BLOCKS_ARGS = [
-    {
-        "kernel_size": 3,
-        "repeats": 1,
-        "filters_in": 32,
-        "filters_out": 16,
-        "expand_ratio": 1,
-        "id_skip": True,
-        "strides": 1,
-        "se_ratio": 0.25,
-    },
-    {
-        "kernel_size": 3,
-        "repeats": 2,
-        "filters_in": 16,
-        "filters_out": 24,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-        "se_ratio": 0.25,
-    },
-    {
-        "kernel_size": 5,
-        "repeats": 2,
-        "filters_in": 24,
-        "filters_out": 40,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-        "se_ratio": 0.25,
-    },
-    {
-        "kernel_size": 3,
-        "repeats": 3,
-        "filters_in": 40,
-        "filters_out": 80,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-        "se_ratio": 0.25,
-    },
-    {
-        "kernel_size": 5,
-        "repeats": 3,
-        "filters_in": 80,
-        "filters_out": 112,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 1,
-        "se_ratio": 0.25,
-    },
-    {
-        "kernel_size": 5,
-        "repeats": 4,
-        "filters_in": 112,
-        "filters_out": 192,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 2,
-        "se_ratio": 0.25,
-    },
-    {
-        "kernel_size": 3,
-        "repeats": 1,
-        "filters_in": 192,
-        "filters_out": 320,
-        "expand_ratio": 6,
-        "id_skip": True,
-        "strides": 1,
-        "se_ratio": 0.25,
-    },
-]
-
-CONV_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 2.0,
-        "mode": "fan_out",
-        "distribution": "truncated_normal",
-    },
-}
-
-DENSE_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 1.0 / 3.0,
-        "mode": "fan_out",
-        "distribution": "uniform",
-    },
-}
-
-BASE_DOCSTRING = """Instantiates the {name} architecture.
-
-    Reference:
-    - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
-        (ICML 2019)
-
-    This class represents a Keras image classification model.
-
-    For image classification use cases, see
-    [this page for detailed examples](https://keras.io/api/applications/#usage-examples-for-image-classification-models).
-
-    For transfer learning use cases, make sure to read the
-    [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
-
-    Args:
-        include_rescaling: bool, whether to rescale the inputs. If set to
-            True, inputs will be passed through a `Rescaling(1/255.0)` layer.
-        include_top: bool, Whether to include the fully-connected layer at the
-            top of the network.
-        weights: One of `None` (random initialization), or the path to the
-            weights file to be loaded.
-        input_shape: tuple, Optional shape tuple. It should have exactly 3
-            inputs channels.
-        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to
-            use as image input for the model.
-        pooling: Optional pooling mode for feature extraction when `include_top`
-            is `False`, defaults to None.
-            - `None` means that the output of the model will be the 4D tensor
-                output of the last convolutional layer.
-            - `avg` means that global average pooling will be applied to the
-                output of the last convolutional layer, and thus the output of
-                the model will be a 2D tensor.
-            - `max` means that global max pooling will be applied.
-        num_classes: int, Optional number of classes to classify images into,
-            only to be specified if `include_top` is True, and if no `weights`
-            argument is specified, defaults to None.
-        classifier_activation: A `str` or callable. The activation function to
-            use on the "top" layer. Ignored unless `include_top=True`. Set
-            `classifier_activation=None` to return the logits of the "top"
-            layer. Defaults to 'softmax'. When loading pretrained weights,
-            `classifier_activation` can only be `None` or `"softmax"`.
-
-    Returns:
-        A `keras.Model` instance.
-"""  # noqa: E501
-
-BN_AXIS = 3
-
-
-def correct_pad(inputs, kernel_size):
-    """Returns a tuple for zero-padding for 2D convolution with downsampling.
-    Args:
-      inputs: Input tensor.
-      kernel_size: An integer or tuple/list of 2 integers.
-    Returns:
-      A tuple.
-    """
-    img_dim = 1
-    input_size = backend.int_shape(inputs)[img_dim : (img_dim + 2)]
-    if isinstance(kernel_size, int):
-        kernel_size = (kernel_size, kernel_size)
-    if input_size[0] is None:
-        adjust = (1, 1)
-    else:
-        adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
-    correct = (kernel_size[0] // 2, kernel_size[1] // 2)
-    return (
-        (correct[0] - adjust[0], correct[0]),
-        (correct[1] - adjust[1], correct[1]),
-    )
-
-
-def apply_conv_bn(
-    x,
-    conv_type,
-    filters,
-    kernel_size,
-    strides=1,
-    padding="same",
-    use_bias=False,
-    kernel_initializer=CONV_KERNEL_INITIALIZER,
-    bn_norm=True,
-    activation="swish",
-    name="",
-):
-    """
-    Represents Convolutional Block with optional Batch Normalization layer and
-    activation layer
-
-    Args:
-        x: Tensor
-        conv_type: str, Type of Conv layer to be used in block.
-            - 'normal': The Conv2D layer will be used.
-            - 'depth': The DepthWiseConv2D layer will be used.
-        filters: int, The filter size of the Conv layer. It should be `None`
-            when `conv_type` is set as `depth`
-        kernel_size: int (or) tuple, The kernel size of the Conv layer.
-        strides: int (or) tuple, The stride value of Conv layer.
-        padding: str (or) callable, The type of padding for Conv layer.
-        use_bias: bool, Boolean to use bias for Conv layer.
-        kernel_initializer: dict (or) str (or) callable, The kernel initializer
-            for Conv layer.
-        bn_norm: bool, Boolean to add BatchNormalization layer after Conv layer.
-        activation: str (or) callable, Activation to be applied on the output at
-            the end.
-        name: str, name of the block
-
-    Returns:
-        tf.Tensor
-    """
-    if conv_type == "normal":
-        if filters is None or kernel_size is None:
-            raise ValueError(
-                "The filter size and kernel size should be set for Conv2D "
-                "layer."
-            )
-        x = layers.Conv2D(
-            filters,
-            kernel_size,
-            strides=strides,
-            padding=padding,
-            use_bias=use_bias,
-            kernel_initializer=kernel_initializer,
-            name=name + "_conv",
-        )(x)
-    elif conv_type == "depth":
-        if filters is not None:
-            raise ValueError(
-                "Filter size shouldn't be set for DepthWiseConv2D layer."
-            )
-        if kernel_size is None or strides is None:
-            raise ValueError(
-                "The kernel size and strides should be set for DepthWiseConv2D "
-                "layer."
-            )
-        x = layers.DepthwiseConv2D(
-            kernel_size,
-            strides=strides,
-            padding=padding,
-            use_bias=use_bias,
-            depthwise_initializer=kernel_initializer,
-            name=name + "_dwconv",
-        )(x)
-    else:
-        raise ValueError(
-            "The 'conv_type' parameter should be set either to 'normal' or "
-            "'depth'"
-        )
-
-    if bn_norm:
-        x = layers.BatchNormalization(axis=BN_AXIS, name=name + "_bn")(x)
-    if activation is not None:
-        x = layers.Activation(activation, name=name + "_activation")(x)
-
-    return x
-
-
-def apply_efficientnet_block(
-    inputs,
-    filters_in=32,
-    filters_out=16,
-    kernel_size=3,
-    strides=1,
-    activation="swish",
-    expand_ratio=1,
-    se_ratio=0.0,
-    id_skip=True,
-    drop_rate=0.0,
-    name="",
-):
-    """An inverted residual block.
-
-    Args:
-        inputs: Tensor, The input tensor of the block
-        filters_in: integer, the number of input filters.
-        filters_out: integer, the number of output filters.
-        kernel_size: integer, the dimension of the convolution window.
-        strides: integer, the stride of the convolution.
-        activation: activation function.
-        expand_ratio: integer, scaling coefficient for the input filters.
-        se_ratio: float between 0 and 1, fraction to squeeze the input filters.
-        id_skip: boolean.
-        drop_rate: float between 0 and 1, fraction of the input units to drop.
-        name: string, block label.
-
-    Returns:
-        tf.Tensor
-    """
-    filters = filters_in * expand_ratio
-    if expand_ratio != 1:
-        x = apply_conv_bn(
-            x=inputs,
-            conv_type="normal",
-            filters=filters,
-            kernel_size=1,
-            padding="same",
-            use_bias=False,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            bn_norm=True,
-            activation=activation,
-            name=name + "_expand",
-        )
-    else:
-        x = inputs
-
-    # Depthwise Convolution
-    if strides == 2:
-        x = layers.ZeroPadding2D(
-            padding=correct_pad(x, kernel_size),
-            name=name + "_dwconv_pad",
-        )(x)
-        conv_pad = "valid"
-    else:
-        conv_pad = "same"
-
-    x = apply_conv_bn(
-        x=x,
-        conv_type="depth",
-        filters=None,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=conv_pad,
-        use_bias=False,
-        kernel_initializer=CONV_KERNEL_INITIALIZER,
-        bn_norm=True,
-        activation=activation,
-        name=name,
-    )
-
-    # Squeeze and Excitation phase
-    if 0 < se_ratio <= 1:
-        filters_se = max(1, int(filters_in * se_ratio))
-        se = layers.GlobalAveragePooling2D(name=name + "_se_squeeze")(x)
-        if BN_AXIS == 1:
-            se_shape = (filters, 1, 1)
-        else:
-            se_shape = (1, 1, filters)
-        se = layers.Reshape(se_shape, name=name + "_se_reshape")(se)
-        se = layers.Conv2D(
-            filters_se,
-            1,
-            padding="same",
-            activation=activation,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            name=name + "_se_reduce",
-        )(se)
-        se = layers.Conv2D(
-            filters,
-            1,
-            padding="same",
-            activation="sigmoid",
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            name=name + "_se_expand",
-        )(se)
-        x = layers.multiply([x, se], name=name + "_se_excite")
-
-    # Output phase
-    x = apply_conv_bn(
-        x=x,
-        conv_type="normal",
-        filters=filters_out,
-        kernel_size=1,
-        padding="same",
-        use_bias=False,
-        kernel_initializer=CONV_KERNEL_INITIALIZER,
-        bn_norm=True,
-        activation=None,
-        name=name + "_project",
-    )
-
-    if id_skip and strides == 1 and filters_in == filters_out:
-        if drop_rate > 0:
-            x = layers.Dropout(
-                drop_rate,
-                noise_shape=(None, 1, 1, 1),
-                name=name + "_drop",
-            )(x)
-        x = layers.add([x, inputs], name=name + "_add")
-
-    return x
-
-
-@keras.utils.register_keras_serializable(package="keras_cv.models")
-class EfficientNet(keras.Model):
-    """This class represents a Keras EfficientNet architecture.
-    Args:
-        include_rescaling: bool, whether to rescale the inputs. If set to
-            True, inputs will be passed through a `Rescaling(1/255.0)` layer.
-        include_top: bool, whether to include the fully-connected layer at the
-            top of the network.
-        width_coefficient: float, scaling coefficient for network width.
-        depth_coefficient: float, scaling coefficient for network depth.
-        default_size: integer, default input image size.
-        dropout_rate: float, dropout rate before final classifier layer.
-        drop_connect_rate: float, dropout rate at skip connections.
-        depth_divisor: integer, a unit of network width.
-        activation: activation function.
-        blocks_args: list of dicts, parameters to construct block modules.
-        model_name: string, model name.
-        weights: one of `None` (random initialization), or the path to the
-            weights file to be loaded.
-        input_shape: optional shape tuple, it should have exactly 3 input
-            channels.
-        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to
-            use as image input for the model.
-        pooling: optional pooling mode for feature extraction when `include_top`
-            is `False`.
-            - `None` means that the output of the model will be the 4D tensor
-                output of the last convolutional layer.
-            - `avg` means that global average pooling will be applied to the
-                output of the last convolutional layer, and thus the output of
-                the model will be a 2D tensor.
-            - `max` means that global max pooling will be applied.
-        num_classes: optional number of classes to classify images into,
-            only to be specified if `include_top` is True, and if no `weights`
-            argument is specified.
-        classifier_activation: A `str` or callable. The activation function to
-            use on the "top" layer. Ignored unless `include_top=True`. Set
-            `classifier_activation=None` to return the logits of the "top"
-            layer.
-    Returns:
-      A `keras.Model` instance.
-    Raises:
-      ValueError: in case of invalid argument for `weights`, or invalid input
-        shape.
-      ValueError: if `classifier_activation` is not `softmax` or `None` when
-        using a pretrained top layer.
-    """
-
-    def __init__(
-        self,
-        include_rescaling,
-        include_top,
-        width_coefficient,
-        depth_coefficient,
-        default_size,
-        dropout_rate=0.2,
-        drop_connect_rate=0.2,
-        depth_divisor=8,
-        activation="swish",
-        blocks_args="default",
-        model_name="efficientnet",
-        weights=None,
-        input_shape=(None, None, 3),
-        input_tensor=None,
-        pooling=None,
-        num_classes=None,
-        classifier_activation="softmax",
-        **kwargs,
-    ):
-        blocks_args_type = blocks_args
-
-        if blocks_args == "default":
-            blocks_args = DEFAULT_BLOCKS_ARGS
-
-        if weights and not tf.io.gfile.exists(weights):
-            raise ValueError(
-                "The `weights` argument should be either `None` or the path to "
-                "the weights file to be loaded. Weights file not found at "
-                f"location: {weights}"
-            )
-
-        if include_top and not num_classes:
-            raise ValueError(
-                "If `include_top` is True, you should specify `num_classes`. "
-                f"Received: num_classes={num_classes}"
-            )
-
-        if include_top and pooling:
-            raise ValueError(
-                f"`pooling` must be `None` when `include_top=True`."
-                f"Received pooling={pooling} and include_top={include_top}. "
-            )
-
-        img_input = utils.parse_model_inputs(input_shape, input_tensor)
-
-        # Build stem
-        x = img_input
-
-        if include_rescaling:
-            # Use common rescaling strategy across keras_cv
-            x = layers.Rescaling(1.0 / 255.0)(x)
-
-        x = layers.ZeroPadding2D(
-            padding=correct_pad(x, 3), name="stem_conv_pad"
-        )(x)
-
-        x = apply_conv_bn(
-            x=x,
-            conv_type="normal",
-            filters=EfficientNet.round_filters(
-                32, width_coefficient, depth_divisor
-            ),
-            kernel_size=3,
-            strides=2,
-            padding="valid",
-            use_bias=False,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            bn_norm=True,
-            activation=activation,
-            name="stem",
-        )
-
-        # Build blocks
-        blocks_args = copy.deepcopy(blocks_args)
-
-        b = 0
-        blocks = float(
-            sum(
-                EfficientNet.round_repeats(args["repeats"], depth_coefficient)
-                for args in blocks_args
-            )
-        )
-        for i, args in enumerate(blocks_args):
-            assert args["repeats"] > 0
-            # Update block input and output filters based on depth multiplier.
-            args["filters_in"] = EfficientNet.round_filters(
-                args["filters_in"], width_coefficient, depth_divisor
-            )
-            args["filters_out"] = EfficientNet.round_filters(
-                args["filters_out"], width_coefficient, depth_divisor
-            )
-
-            for j in range(
-                EfficientNet.round_repeats(
-                    args.pop("repeats"), depth_coefficient
-                )
-            ):
-                # The first block needs to take care of stride and filter size
-                # increase.
-                if j > 0:
-                    args["strides"] = 1
-                    args["filters_in"] = args["filters_out"]
-                x = apply_efficientnet_block(
-                    inputs=x,
-                    activation=activation,
-                    drop_rate=drop_connect_rate * b / blocks,
-                    name="block{}{}".format(i + 1, chr(j + 97)),
-                    **args,
-                )
-                b += 1
-
-        # Build top
-        x = apply_conv_bn(
-            x=x,
-            conv_type="normal",
-            filters=self.round_filters(1280, width_coefficient, depth_divisor),
-            kernel_size=1,
-            padding="same",
-            use_bias=False,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
-            bn_norm=True,
-            activation=activation,
-            name="top",
-        )
-
-        if include_top:
-            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
-            if dropout_rate > 0:
-                x = layers.Dropout(dropout_rate, name="top_dropout")(x)
-            x = layers.Dense(
-                num_classes,
-                activation=classifier_activation,
-                kernel_initializer=DENSE_KERNEL_INITIALIZER,
-                name="predictions",
-            )(x)
-        else:
-            if pooling == "avg":
-                x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
-            elif pooling == "max":
-                x = layers.GlobalMaxPooling2D(name="max_pool")(x)
-
-        inputs = img_input
-
-        # Create model.
-        super().__init__(inputs=inputs, outputs=x, name=model_name, **kwargs)
-
-        # Load weights.
-        if weights is not None:
-            self.load_weights(weights)
-
-        self.include_rescaling = include_rescaling
-        self.include_top = include_top
-        self.width_coefficient = width_coefficient
-        self.depth_coefficient = depth_coefficient
-        self.default_size = default_size
-        self.dropout_rate = dropout_rate
-        self.drop_connect_rate = drop_connect_rate
-        self.depth_divisor = depth_divisor
-        self.activation = activation
-        self.blocks_args = blocks_args_type
-        self.input_tensor = input_tensor
-        self.pooling = pooling
-        self.num_classes = num_classes
-        self.classifier_activation = classifier_activation
-
-    @staticmethod
-    def round_filters(filters, width_coefficient, divisor):
-        """Round number of filters based on depth multiplier.
-        Args:
-            filters: int, number of filters for Conv layer
-            width_coefficient: float, denotes the scaling coefficient of network
-                width
-            divisor: int, a unit of network width
-
-        Returns:
-            int, new rounded filters value for Conv layer
-        """
-        filters *= width_coefficient
-        new_filters = max(
-            divisor, int(filters + divisor / 2) // divisor * divisor
-        )
-        # Make sure that round down does not go down by more than 10%.
-        if new_filters < 0.9 * filters:
-            new_filters += divisor
-        return int(new_filters)
-
-    @staticmethod
-    def round_repeats(repeats, depth_coefficient):
-        """Round number of repeats based on depth multiplier.
-        Args:
-            repeats: int, number of repeats of efficientnet block
-            depth_coefficient: float, denotes the scaling coefficient of network
-                depth
-
-        Returns:
-            int, rounded repeats
-        """
-        return int(math.ceil(depth_coefficient * repeats))
-
-    def get_config(self):
-        return {
-            "include_rescaling": self.include_rescaling,
-            "include_top": self.include_top,
-            "width_coefficient": self.width_coefficient,
-            "depth_coefficient": self.depth_coefficient,
-            "default_size": self.default_size,
-            "dropout_rate": self.dropout_rate,
-            "drop_connect_rate": self.drop_connect_rate,
-            "depth_divisor": self.depth_divisor,
-            "activation": self.activation,
-            "blocks_args": self.blocks_args,
-            "input_tensor": self.input_tensor,
-            "input_shape": self.input_shape[1:],
-            "model_name": self.name,
-            "pooling": self.pooling,
-            "num_classes": self.num_classes,
-            "classifier_activation": self.classifier_activation,
-            "trainable": self.trainable,
-        }
-
-    @classmethod
-    def from_config(cls, config):
-        return cls(**config)
-
-
-def EfficientNetB0(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb0",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.0,
-        depth_coefficient=1.0,
-        default_size=224,
-        dropout_rate=0.2,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb0"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB1(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb1",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.0,
-        depth_coefficient=1.1,
-        default_size=240,
-        dropout_rate=0.2,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb1"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB2(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb2",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.1,
-        depth_coefficient=1.2,
-        default_size=260,
-        dropout_rate=0.3,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb2"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB3(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb3",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.2,
-        depth_coefficient=1.4,
-        default_size=300,
-        dropout_rate=0.3,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb3"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB4(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb4",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.4,
-        depth_coefficient=1.8,
-        default_size=380,
-        dropout_rate=0.4,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb4"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB5(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb5",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.6,
-        depth_coefficient=2.2,
-        default_size=456,
-        dropout_rate=0.4,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb5"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB6(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb6",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=1.8,
-        depth_coefficient=2.6,
-        default_size=528,
-        dropout_rate=0.5,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb6"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-def EfficientNetB7(
-    *,
-    include_rescaling,
-    include_top,
-    num_classes=None,
-    weights=None,
-    input_shape=(None, None, 3),
-    input_tensor=None,
-    pooling=None,
-    classifier_activation="softmax",
-    name="efficientnetb7",
-    **kwargs,
-):
-    return EfficientNet(
-        include_rescaling,
-        include_top,
-        width_coefficient=2.0,
-        depth_coefficient=3.1,
-        default_size=600,
-        dropout_rate=0.5,
-        model_name=name,
-        weights=parse_weights(weights, include_top, "efficientnetb7"),
-        input_shape=input_shape,
-        input_tensor=input_tensor,
-        pooling=pooling,
-        num_classes=num_classes,
-        classifier_activation=classifier_activation,
-        **kwargs,
-    )
-
-
-EfficientNetB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB0")
-EfficientNetB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB1")
-EfficientNetB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB2")
-EfficientNetB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB3")
-EfficientNetB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB4")
-EfficientNetB5.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB5")
-EfficientNetB6.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB6")
-EfficientNetB7.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB7")
diff --git a/keras_cv/models/legacy/efficientnet_v1_test.py b/keras_cv/models/legacy/efficientnet_v1_test.py
deleted file mode 100644
index 7615075618..0000000000
--- a/keras_cv/models/legacy/efficientnet_v1_test.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2022 The KerasCV Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from absl.testing import parameterized
-
-from keras_cv.models.legacy import efficientnet_v1
-from keras_cv.tests.test_case import TestCase
-
-from .models_test import ModelsTest
-
-MODEL_LIST = [
-    (efficientnet_v1.EfficientNetB0, 1280, {}),
-]
-
-"""
-Below are other configurations that we omit from our CI but that can/should
-be tested manually when making changes to this model.
-(efficientnet_v1.EfficientNetB1, 1280, {}),
-(efficientnet_v1.EfficientNetB2, 1408, {}),
-(efficientnet_v1.EfficientNetB3, 1536, {}),
-(efficientnet_v1.EfficientNetB4, 1792, {}),
-(efficientnet_v1.EfficientNetB5, 2048, {}),
-(efficientnet_v1.EfficientNetB6, 2304, {}),
-(efficientnet_v1.EfficientNetB7, 2560, {}),
-"""
-
-
-class EfficientNetV1Test(ModelsTest, TestCase):
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_base(self, app, _, args):
-        super()._test_application_base(app, _, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_with_rescaling(self, app, last_dim, args):
-        super()._test_application_with_rescaling(app, last_dim, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_pooling(self, app, last_dim, args):
-        super()._test_application_pooling(app, last_dim, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_application_variable_input_channels(self, app, last_dim, args):
-        super()._test_application_variable_input_channels(app, last_dim, args)
-
-    @parameterized.parameters(*MODEL_LIST)
-    def test_model_can_be_used_as_backbone(self, app, last_dim, args):
-        super()._test_model_can_be_used_as_backbone(app, last_dim, args)

From 118f502abd38d5b520c0054972e2a2081915d871 Mon Sep 17 00:00:00 2001
From: Bhavesh Misra <bhaveshmisra1@gmail.com>
Date: Thu, 17 Aug 2023 22:46:24 +0530
Subject: [PATCH 07/17] Issue_1957 Returning the Matplotlib plt object in the
 plot_bounding_box_gallery.py file (#2000)

* Updating_the plot_bounding_box_gallery.py

* Tried_plot_bounding_box_gallery

* Tried_plot_bounding_box_gallery

* Trying_passing_classmapping

* returning_plt_object_done

* returning_plt_object_done

* Done_hopefully

* Done_Hopefully_fnal_2

* linting

* Revert "linting"

This reverts commit 64e7e2ab376b8b3505b9c961bf55259b033174c7.
I made a mistake lol

* Linting

* Linting_Donee

* Conditional_Removed
---
 examples/visualization/plot_image_gallery.py        | 2 +-
 keras_cv/visualization/plot_bounding_box_gallery.py | 2 +-
 keras_cv/visualization/plot_image_gallery.py        | 8 +++-----
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/examples/visualization/plot_image_gallery.py b/examples/visualization/plot_image_gallery.py
index 17197aca4d..be8131643c 100644
--- a/examples/visualization/plot_image_gallery.py
+++ b/examples/visualization/plot_image_gallery.py
@@ -5,7 +5,7 @@
 Date created: 2022/10/16
 Last modified: 2022/06/24
 Description: Visualize ground truth and predicted bounding boxes for a given
-             dataset.
+            dataset.
 """
 
 """
diff --git a/keras_cv/visualization/plot_bounding_box_gallery.py b/keras_cv/visualization/plot_bounding_box_gallery.py
index 1f6bd5cf64..73112458bd 100644
--- a/keras_cv/visualization/plot_bounding_box_gallery.py
+++ b/keras_cv/visualization/plot_bounding_box_gallery.py
@@ -174,7 +174,7 @@ def unpackage_tfds_inputs(inputs):
             ),
         ]
 
-    plot_image_gallery(
+    return plot_image_gallery(
         plotted_images,
         value_range,
         legend_handles=legend_handles,
diff --git a/keras_cv/visualization/plot_image_gallery.py b/keras_cv/visualization/plot_image_gallery.py
index 1d98c20f53..05cbbad796 100644
--- a/keras_cv/visualization/plot_image_gallery.py
+++ b/keras_cv/visualization/plot_image_gallery.py
@@ -117,9 +117,6 @@ def plot_image_gallery(
     """
     assert_matplotlib_installed("plot_bounding_box_gallery")
 
-    if path is None and show is None:
-        # Default to showing the image
-        show = True
     if path is not None and show:
         raise ValueError(
             "plot_gallery() expects either `path` to be set, or `show` "
@@ -178,8 +175,9 @@ def plot_image_gallery(
             current_axis.margins(x=0, y=0)
             current_axis.axis("off")
 
-    if path is None and not show:
-        return
+    if path is None and show is None:
+        return fig
+
     if path is not None:
         plt.savefig(
             fname=path,

From 38381bad0496129afdf22a6b9f79ce5651b38bc5 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Fri, 18 Aug 2023 02:04:51 +0530
Subject: [PATCH 08/17] [EfficientNetV2 Backbone] Style fix (#2031)

* preset+style fix

* fix
---
 .../efficientnet_v2_aliases.py                | 99 ++++++++-----------
 1 file changed, 43 insertions(+), 56 deletions(-)

diff --git a/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py b/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py
index 6489d5b65b..f338874982 100644
--- a/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py
+++ b/keras_cv/models/backbones/efficientnet_v2/efficientnet_v2_aliases.py
@@ -20,12 +20,9 @@
 from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_backbone_presets import (  # noqa: E501
     backbone_presets,
 )
-from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_backbone_presets import (  # noqa: E501
-    backbone_presets_with_weights,
-)
 from keras_cv.utils.python_utils import classproperty
 
-ALIAS_BASE_DOCSTRING = """Instantiates the {name} architecture.
+ALIAS_DOCSTRING = """Instantiates the {name} architecture.
 
     Reference:
     - [EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298)
@@ -64,8 +61,8 @@ def __new__(
     def presets(cls):
         """Dictionary of preset names and configurations."""
         return {
-            "efficientnetv2_s": copy.deepcopy(
-                backbone_presets["efficientnetv2_s"]
+            "efficientnetv2_s_imagenet": copy.deepcopy(
+                backbone_presets["efficientnetv2_s_imagenet"]
             ),
         }
 
@@ -73,11 +70,7 @@ def presets(cls):
     def presets_with_weights(cls):
         """Dictionary of preset names and configurations that include
         weights."""
-        return {
-            "efficientnetv2_s_imagenet": copy.deepcopy(
-                backbone_presets_with_weights["efficientnetv2_s_imagenet"]
-            ),
-        }
+        return cls.presets
 
 
 @keras_cv_export("keras_cv.models.EfficientNetV2MBackbone")
@@ -102,11 +95,7 @@ def __new__(
     @classproperty
     def presets(cls):
         """Dictionary of preset names and configurations."""
-        return {
-            "efficientnetv2_m": copy.deepcopy(
-                backbone_presets["efficientnetv2_m"]
-            ),
-        }
+        return {}
 
     @classproperty
     def presets_with_weights(cls):
@@ -137,11 +126,7 @@ def __new__(
     @classproperty
     def presets(cls):
         """Dictionary of preset names and configurations."""
-        return {
-            "efficientnetv2_l": copy.deepcopy(
-                backbone_presets["efficientnetv2_l"]
-            ),
-        }
+        return {}
 
     @classproperty
     def presets_with_weights(cls):
@@ -173,8 +158,8 @@ def __new__(
     def presets(cls):
         """Dictionary of preset names and configurations."""
         return {
-            "efficientnetv2_b0": copy.deepcopy(
-                backbone_presets["efficientnetv2_b0"]
+            "efficientnetv2_b0_imagenet": copy.deepcopy(
+                backbone_presets["efficientnetv2_b0_imagenet"]
             ),
         }
 
@@ -182,11 +167,7 @@ def presets(cls):
     def presets_with_weights(cls):
         """Dictionary of preset names and configurations that include
         weights."""
-        return {
-            "efficientnetv2_b0_imagenet": copy.deepcopy(
-                backbone_presets_with_weights["efficientnetv2_b0_imagenet"]
-            ),
-        }
+        return cls.presets
 
 
 @keras_cv_export("keras_cv.models.EfficientNetV2B1Backbone")
@@ -212,8 +193,8 @@ def __new__(
     def presets(cls):
         """Dictionary of preset names and configurations."""
         return {
-            "efficientnetv2_b1": copy.deepcopy(
-                backbone_presets["efficientnetv2_b1"]
+            "efficientnetv2_b1_imagenet": copy.deepcopy(
+                backbone_presets["efficientnetv2_b1_imagenet"]
             ),
         }
 
@@ -221,11 +202,7 @@ def presets(cls):
     def presets_with_weights(cls):
         """Dictionary of preset names and configurations that include
         weights."""
-        return {
-            "efficientnetv2_b1_imagenet": copy.deepcopy(
-                backbone_presets_with_weights["efficientnetv2_b1_imagenet"]
-            ),
-        }
+        return cls.presets
 
 
 @keras_cv_export("keras_cv.models.EfficientNetV2B2Backbone")
@@ -251,8 +228,8 @@ def __new__(
     def presets(cls):
         """Dictionary of preset names and configurations."""
         return {
-            "efficientnetv2_b2": copy.deepcopy(
-                backbone_presets["efficientnetv2_b2"]
+            "efficientnetv2_b2_imagenet": copy.deepcopy(
+                backbone_presets["efficientnetv2_b2_imagenet"]
             ),
         }
 
@@ -260,11 +237,7 @@ def presets(cls):
     def presets_with_weights(cls):
         """Dictionary of preset names and configurations that include
         weights."""
-        return {
-            "efficientnetv2_b2_imagenet": copy.deepcopy(
-                backbone_presets_with_weights["efficientnetv2_b2_imagenet"]
-            ),
-        }
+        return cls.presets
 
 
 @keras_cv_export("keras_cv.models.EfficientNetV2B3Backbone")
@@ -298,24 +271,38 @@ def presets_with_weights(cls):
         return {}
 
 
-EfficientNetV2B0Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2B0"
+setattr(
+    EfficientNetV2SBackbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2S"),
 )
-EfficientNetV2B1Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2B1"
+setattr(
+    EfficientNetV2MBackbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2M"),
 )
-EfficientNetV2B2Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2B2"
+setattr(
+    EfficientNetV2LBackbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2L"),
 )
-EfficientNetV2B3Backbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2B3"
+setattr(
+    EfficientNetV2B0Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2B0"),
 )
-EfficientNetV2SBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2S"
+setattr(
+    EfficientNetV2B1Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2B1"),
 )
-EfficientNetV2MBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2M"
+setattr(
+    EfficientNetV2B2Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2B2"),
 )
-EfficientNetV2LBackbone.__doc__ = ALIAS_BASE_DOCSTRING.format(
-    name="EfficientNetV2L"
+setattr(
+    EfficientNetV2B3Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="EfficientNetV2B3"),
 )

From 5373b916d15544a6763347575d440570ec617495 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Fri, 18 Aug 2023 13:30:37 -0600
Subject: [PATCH 09/17] Add pre-trained MobileNetV3Small preset (#2034)

---
 .../mobilenet_v3_backbone_presets.py            | 17 +++++++++++++++++
 .../mobilenet_v3_backbone_presets_test.py       |  4 ++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
index f7cb46171c..8f350fd7b5 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
@@ -200,6 +200,23 @@
         "weights_url": "https://storage.googleapis.com/keras-cv/models/mobilenetv3/mobilenetv3_large_imagenet_backbone.h5",  # noqa: E501
         "weights_hash": "ec55ea2f4f4ee9a2ddf3ee8e2dd784e9d5732690c1fc5afc7e1b2a66703f3337",  # noqa: E501
     },
+    "mobilenet_v3_small_imagenet": {
+        "metadata": {
+            "description": (
+                "MobileNetV3 model with 28 layers where the batch "
+                "normalization and hard-swish activation are applied after the "
+                "convolution layers. "
+                "Pre-trained on the ImageNet 2012 classification task."
+            ),
+            "params": 2_994_518,
+            "official_name": "MobileNetV3",
+            "path": "mobilenetv3",
+        },
+        "class_name": "keras_cv>MobileNetV3Backbone",
+        "config": backbone_presets_no_weights["mobilenet_v3_small"]["config"],
+        "weights_url": "https://storage.googleapis.com/keras-cv/models/mobilenetv3/mobilenetv3_small_imagenet_backbone.h5",  # noqa: E501
+        "weights_hash": "592c2707edfc6c673a3b2d9aaf76dee678557f4a32d573c74f96c8122effa503",  # noqa: E501
+    },
 }
 
 backbone_presets = {
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py
index 2d36e60166..aa0c806aab 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets_test.py
@@ -36,7 +36,7 @@ def setUp(self):
         self.input_batch = np.ones(shape=(8, 224, 224, 3))
 
     def test_backbone_output(self):
-        model = MobileNetV3Backbone.from_preset("mobilenet_v3_large_imagenet")
+        model = MobileNetV3Backbone.from_preset("mobilenet_v3_small_imagenet")
         outputs = model(self.input_batch)
 
         # The forward pass from a preset should be stable!
@@ -45,7 +45,7 @@ def test_backbone_output(self):
         # We should only update these numbers if we are updating a weights
         # file, or have found a discrepancy with the upstream source.
         outputs = outputs[0, 0, 0, :5]
-        expected = [0.27, 0.01, 0.29, 0.08, -0.12]
+        expected = [0.25, 1.13, -0.26, 0.10, 0.03]
         # Keep a high tolerance, so we are robust to different hardware.
         self.assertAllClose(
             ops.convert_to_numpy(outputs), expected, atol=0.01, rtol=0.01

From 30bbd60ac455d0415814815d4ce4e0cb5cbdee94 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Mon, 21 Aug 2023 22:02:06 +0530
Subject: [PATCH 10/17] alias fix + doc fix in preset (#2035)

---
 .../models/backbones/mobilenet_v3/mobilenet_v3_aliases.py | 8 ++++++--
 .../mobilenet_v3/mobilenet_v3_backbone_presets.py         | 6 +++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py
index 9d9c91e8e0..da19b81f28 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_aliases.py
@@ -73,12 +73,16 @@ def __new__(
     @classproperty
     def presets(cls):
         """Dictionary of preset names and configurations."""
-        return {}
+        return {
+            "mobilenet_v3_small_imagenet": copy.deepcopy(
+                backbone_presets["mobilenet_v3_small_imagenet"]
+            ),
+        }
 
     @classproperty
     def presets_with_weights(cls):
         """Dictionary of preset names and configurations."""
-        return {}
+        return cls.presets
 
 
 @keras_cv_export("keras_cv.models.MobileNetV3LargeBackbone")
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
index 8f350fd7b5..75ad436fe1 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone_presets.py
@@ -191,7 +191,7 @@
                 "convolution layers. "
                 "Pre-trained on the ImageNet 2012 classification task."
             ),
-            "params": 2_994_518,
+            "params": 2994518,
             "official_name": "MobileNetV3",
             "path": "mobilenetv3",
         },
@@ -203,12 +203,12 @@
     "mobilenet_v3_small_imagenet": {
         "metadata": {
             "description": (
-                "MobileNetV3 model with 28 layers where the batch "
+                "MobileNetV3 model with 14 layers where the batch "
                 "normalization and hard-swish activation are applied after the "
                 "convolution layers. "
                 "Pre-trained on the ImageNet 2012 classification task."
             ),
-            "params": 2_994_518,
+            "params": 933502,
             "official_name": "MobileNetV3",
             "path": "mobilenetv3",
         },

From 96ac1ee72ba27ca0e194886d0d9baa3183630200 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Mon, 21 Aug 2023 22:34:56 +0530
Subject: [PATCH 11/17] keras.layers.add->keras.layers.Add (#2033)

---
 keras_cv/layers/fusedmbconv.py                                  | 2 +-
 keras_cv/layers/mbconv.py                                       | 2 +-
 .../backbones/efficientnet_lite/efficientnet_lite_backbone.py   | 2 +-
 .../backbones/efficientnet_v1/efficientnet_v1_backbone.py       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/keras_cv/layers/fusedmbconv.py b/keras_cv/layers/fusedmbconv.py
index 489c619dbb..2ac33a54df 100644
--- a/keras_cv/layers/fusedmbconv.py
+++ b/keras_cv/layers/fusedmbconv.py
@@ -218,7 +218,7 @@ def call(self, inputs):
         if self.strides == 1 and self.input_filters == self.output_filters:
             if self.survival_probability:
                 x = self.dropout(x)
-            x = keras.layers.add([x, inputs], name=self.name + "add")
+            x = keras.layers.Add(name=self.name + "add")([x, inputs])
         return x
 
     def get_config(self):
diff --git a/keras_cv/layers/mbconv.py b/keras_cv/layers/mbconv.py
index fba5b0194d..34a7e0c8d2 100644
--- a/keras_cv/layers/mbconv.py
+++ b/keras_cv/layers/mbconv.py
@@ -226,7 +226,7 @@ def call(self, inputs):
         if self.strides == 1 and self.input_filters == self.output_filters:
             if self.survival_probability:
                 x = self.dropout(x)
-            x = keras.layers.add([x, inputs], name=self.name + "add")
+            x = keras.layers.Add(name=self.name + "add")([x, inputs])
         return x
 
     def get_config(self):
diff --git a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
index d3a6fd8815..fe8eab1a08 100644
--- a/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
+++ b/keras_cv/models/backbones/efficientnet_lite/efficientnet_lite_backbone.py
@@ -362,5 +362,5 @@ def apply_efficient_net_lite_block(
             x = keras.layers.Dropout(
                 dropout_rate, noise_shape=(None, 1, 1, 1), name=name + "drop"
             )(x)
-        x = keras.layers.add([x, inputs], name=name + "add")
+        x = keras.layers.Add(name=name + "add")([x, inputs])
     return x
diff --git a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
index cc39d3d31c..c90bebf198 100644
--- a/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
+++ b/keras_cv/models/backbones/efficientnet_v1/efficientnet_v1_backbone.py
@@ -449,6 +449,6 @@ def apply_efficientnet_block(
                 noise_shape=(None, 1, 1, 1),
                 name=name + "drop",
             )(x)
-        x = keras.layers.add([x, inputs], name=name + "add")
+        x = keras.layers.Add(name=name + "add")([x, inputs])
 
     return x

From f3c629451a59519ef7714d5fb1bf936db68a58a3 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Wed, 23 Aug 2023 01:19:01 +0530
Subject: [PATCH 12/17] Added support of segmentation mask in RandomShear Layer
 (#2021)

* seg mask support

* format

* add test

* add demo

* fix

* update readme

* review comment

* fix
---
 .../segmentation/random_shear_demo.py         | 34 +++++++++++++++++++
 keras_cv/layers/preprocessing/README.md       |  2 +-
 keras_cv/layers/preprocessing/random_shear.py | 27 +++++++++++++++
 .../layers/preprocessing/random_shear_test.py | 22 +++++++++++-
 4 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 examples/layers/preprocessing/segmentation/random_shear_demo.py

diff --git a/examples/layers/preprocessing/segmentation/random_shear_demo.py b/examples/layers/preprocessing/segmentation/random_shear_demo.py
new file mode 100644
index 0000000000..1a78a0c8fb
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_shear_demo.py
@@ -0,0 +1,34 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_shear_demo.py shows how to use the RandomShear preprocessing layer.
+
+Uses the oxford iiit pet_dataset.  In this script the pets
+are loaded, then are passed through the preprocessing layers.
+Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+    ds = demo_utils.load_oxford_iiit_pet_dataset()
+    randomshear = preprocessing.RandomShear(0.5, 0.5)
+    ds = ds.map(randomshear, num_parallel_calls=tf.data.AUTOTUNE)
+    demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index b3fef9e96c..4f077d8cec 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -37,7 +37,7 @@ The provided table gives an overview of the different augmentation layers availa
 | RandomRotation | ✅ | ✅ | ✅ | ✅ |
 | RandomSaturation | ✅ | ✅ | ✅ | ✅ |
 | RandomSharpness | ✅ | ✅ | ✅ | ✅ |
-| RandomShear | ✅ | ❌ | ✅ | ✅ |
+| RandomShear | ✅ | ✅ | ✅ | ✅ |
 | RandomTranslation | ✅ | ✅ | ✅ | ✅ |
 | RandomZoom | ✅ | ❌ | ❌ | ✅ |
 | RepeatedAugmentation <sup>+</sup> | - | - | - | - |
diff --git a/keras_cv/layers/preprocessing/random_shear.py b/keras_cv/layers/preprocessing/random_shear.py
index 7c20a9f8d5..dc69288d70 100644
--- a/keras_cv/layers/preprocessing/random_shear.py
+++ b/keras_cv/layers/preprocessing/random_shear.py
@@ -219,6 +219,33 @@ def _build_shear_y_transform_matrix(shear_y):
     def augment_labels(self, labels, transformations, **kwargs):
         return labels
 
+    def augment_segmentation_masks(
+        self, segmentation_masks, transformations, **kwargs
+    ):
+        x, y = transformations["shear_x"], transformations["shear_y"]
+
+        if x is not None:
+            transforms_x = self._build_shear_x_transform_matrix(x)
+            segmentation_masks = preprocessing.transform(
+                images=segmentation_masks,
+                transforms=transforms_x,
+                interpolation="nearest",
+                fill_mode=self.fill_mode,
+                fill_value=self.fill_value,
+            )
+
+        if y is not None:
+            transforms_y = self._build_shear_y_transform_matrix(y)
+            segmentation_masks = preprocessing.transform(
+                images=segmentation_masks,
+                transforms=transforms_y,
+                interpolation="nearest",
+                fill_mode=self.fill_mode,
+                fill_value=self.fill_value,
+            )
+
+        return segmentation_masks
+
     def augment_bounding_boxes(
         self, bounding_boxes, transformations, images=None, **kwargs
     ):
diff --git a/keras_cv/layers/preprocessing/random_shear_test.py b/keras_cv/layers/preprocessing/random_shear_test.py
index 180e6a16a7..51933b7f0b 100644
--- a/keras_cv/layers/preprocessing/random_shear_test.py
+++ b/keras_cv/layers/preprocessing/random_shear_test.py
@@ -27,19 +27,33 @@ def test_aggressive_shear_fills_at_least_some_pixels(self):
             [2 * tf.ones(img_shape), tf.ones(img_shape)],
             axis=0,
         )
+        ys_segmentation_masks = tf.stack(
+            [2 * tf.ones(img_shape), tf.ones(img_shape)],
+            axis=0,
+        )
         xs = tf.cast(xs, tf.float32)
+        ys_segmentation_masks = tf.cast(ys_segmentation_masks, tf.float32)
 
         fill_value = 0.0
         layer = preprocessing.RandomShear(
             x_factor=(3, 3), seed=0, fill_mode="constant", fill_value=fill_value
         )
         xs = layer(xs)
+        ys_segmentation_masks = layer(ys_segmentation_masks)
 
         # Some pixels should be replaced with fill value
         self.assertTrue(tf.math.reduce_any(xs[0] == fill_value))
         self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
         self.assertTrue(tf.math.reduce_any(xs[1] == fill_value))
         self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
+        self.assertTrue(
+            tf.math.reduce_any(ys_segmentation_masks[0] == fill_value)
+        )
+        self.assertTrue(tf.math.reduce_any(ys_segmentation_masks[0] == 2.0))
+        self.assertTrue(
+            tf.math.reduce_any(ys_segmentation_masks[1] == fill_value)
+        )
+        self.assertTrue(tf.math.reduce_any(ys_segmentation_masks[1] == 1.0))
 
     def test_return_shapes(self):
         """test return dict keys and value pairs"""
@@ -55,6 +69,9 @@ def test_return_shapes(self):
             "classes": tf.random.uniform((2, 3), 0, 1),
         }
 
+        # randomly sample segmentation masks
+        ys_segmentation_masks = tf.ones((2, 512, 512, 3))
+
         layer = preprocessing.RandomShear(
             x_factor=(0.1, 0.3),
             y_factor=(0.1, 0.3),
@@ -68,18 +85,21 @@ def test_return_shapes(self):
                 "images": xs,
                 "targets": ys_labels,
                 "bounding_boxes": ys_bounding_boxes,
+                "segmentation_masks": ys_segmentation_masks,
             }
         )
-        xs, ys_labels, ys_bounding_boxes = (
+        xs, ys_labels, ys_bounding_boxes, ys_segmentation_masks = (
             outputs["images"],
             outputs["targets"],
             outputs["bounding_boxes"],
+            outputs["segmentation_masks"],
         )
         ys_bounding_boxes = bounding_box.to_dense(ys_bounding_boxes)
         self.assertEqual(xs.shape, [2, 512, 512, 3])
         self.assertEqual(ys_labels.shape, [2, 10])
         self.assertEqual(ys_bounding_boxes["boxes"].shape, [2, 3, 4])
         self.assertEqual(ys_bounding_boxes["classes"].shape, [2, 3])
+        self.assertEqual(ys_segmentation_masks.shape, [2, 512, 512, 3])
 
     def test_single_image_input(self):
         """test for single image input"""

From c740f81b59fbf3830c2f0c0131e84e872b4022f1 Mon Sep 17 00:00:00 2001
From: Aritra Roy Gosthipaty <aritra.born2fly@gmail.com>
Date: Wed, 23 Aug 2023 01:46:19 +0530
Subject: [PATCH 13/17] [RandomZoom] Supporting Segmentation Masks (#2010)

* chore: initial commit

* chore: adding initial tests

* update: readme for preprocessing

* fix lint
---
 .../segmentation/random_zoom_demo.py          | 33 +++++++++++++++++
 keras_cv/layers/preprocessing/README.md       |  2 +-
 keras_cv/layers/preprocessing/random_zoom.py  | 25 +++++++++++++
 .../layers/preprocessing/random_zoom_test.py  | 37 ++++++++++++++-----
 4 files changed, 86 insertions(+), 11 deletions(-)
 create mode 100644 examples/layers/preprocessing/segmentation/random_zoom_demo.py

diff --git a/examples/layers/preprocessing/segmentation/random_zoom_demo.py b/examples/layers/preprocessing/segmentation/random_zoom_demo.py
new file mode 100644
index 0000000000..17a6f84536
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_zoom_demo.py
@@ -0,0 +1,33 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_zoom_demo.py shows how to use the RandomZoom preprocessing layer.
+Uses the oxford iiit pet_dataset.  In this script the pets
+are loaded, then are passed through the preprocessing layers.
+Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+    ds = demo_utils.load_oxford_iiit_pet_dataset()
+    randomzoom = preprocessing.RandomZoom(0.5, 0.5)
+    ds = ds.map(randomzoom, num_parallel_calls=tf.data.AUTOTUNE)
+    demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index 4f077d8cec..62f0b00f0d 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -39,7 +39,7 @@ The provided table gives an overview of the different augmentation layers availa
 | RandomSharpness | ✅ | ✅ | ✅ | ✅ |
 | RandomShear | ✅ | ✅ | ✅ | ✅ |
 | RandomTranslation | ✅ | ✅ | ✅ | ✅ |
-| RandomZoom | ✅ | ❌ | ❌ | ✅ |
+| RandomZoom | ✅ | ✅ | ❌ | ✅ |
 | RepeatedAugmentation <sup>+</sup> | - | - | - | - |
 | Rescaling | ❌ | ✅ | ✅ | ✅ |
 | Resizing | ❌ | ✅ | ✅ | ❌ |
diff --git a/keras_cv/layers/preprocessing/random_zoom.py b/keras_cv/layers/preprocessing/random_zoom.py
index 98b08782bc..2e860da649 100644
--- a/keras_cv/layers/preprocessing/random_zoom.py
+++ b/keras_cv/layers/preprocessing/random_zoom.py
@@ -193,6 +193,31 @@ def augment_images(self, images, transformations, **kwargs):
     def augment_labels(self, labels, transformations, **kwargs):
         return labels
 
+    def augment_segmentation_masks(
+        self, segmentation_masks, transformations, **kwargs
+    ):
+        segmentation_masks = preprocessing_utils.ensure_tensor(
+            segmentation_masks, self.compute_dtype
+        )
+        original_shape = segmentation_masks.shape
+        mask_shape = tf.shape(segmentation_masks)
+        mask_hd = tf.cast(mask_shape[H_AXIS], tf.float32)
+        mask_wd = tf.cast(mask_shape[W_AXIS], tf.float32)
+        width_zooms = transformations["width_zooms"]
+        height_zooms = transformations["height_zooms"]
+        zooms = tf.cast(
+            tf.concat([width_zooms, height_zooms], axis=1), dtype=tf.float32
+        )
+        outputs = preprocessing_utils.transform(
+            segmentation_masks,
+            self.get_zoom_matrix(zooms, mask_hd, mask_wd),
+            fill_mode=self.fill_mode,
+            fill_value=self.fill_value,
+            interpolation="nearest",
+        )
+        outputs.set_shape(original_shape)
+        return outputs
+
     def get_zoom_matrix(self, zooms, image_height, image_width, name=None):
         """Returns projective transform(s) for the given zoom(s).
 
diff --git a/keras_cv/layers/preprocessing/random_zoom_test.py b/keras_cv/layers/preprocessing/random_zoom_test.py
index 219bc55779..0fdcf6eec3 100644
--- a/keras_cv/layers/preprocessing/random_zoom_test.py
+++ b/keras_cv/layers/preprocessing/random_zoom_test.py
@@ -35,20 +35,37 @@ def test_output_shapes(self, height_factor, width_factor):
         orig_height = 5
         orig_width = 8
         channels = 3
-        input = tf.random.uniform(
-            shape=[num_samples, orig_height, orig_width, channels],
-        )
+        input = {
+            "images": tf.random.uniform(
+                shape=[num_samples, orig_height, orig_width, channels],
+            ),
+            "segmentation_masks": tf.random.uniform(
+                shape=[num_samples, orig_height, orig_width, 1],
+                minval=0,
+                maxval=2,
+            ),
+        }
         layer = RandomZoom(height_factor, width_factor)
         actual_output = layer(input)
-        expected_output = tf.random.uniform(
-            shape=(
-                num_samples,
-                orig_height,
-                orig_width,
-                channels,
+        expected_output = {
+            "images": tf.random.uniform(
+                shape=[num_samples, orig_height, orig_width, channels],
             ),
+            "segmentation_masks": tf.random.uniform(
+                shape=[num_samples, orig_height, orig_width, 1],
+                minval=0,
+                maxval=2,
+            ),
+        }
+        # Check output shape of images
+        self.assertAllEqual(
+            expected_output["images"].shape, actual_output["images"].shape
+        )
+        # Check output shape of segmentation masks
+        self.assertAllEqual(
+            expected_output["segmentation_masks"].shape,
+            actual_output["segmentation_masks"].shape,
         )
-        self.assertAllEqual(expected_output.shape, actual_output.shape)
 
     def test_random_zoom_in_numeric(self):
         for dtype in (np.int64, np.float32):

From d01aee44a73c228c1981ffd913fd7a96ad40b0c0 Mon Sep 17 00:00:00 2001
From: Piyush Thakur <53268607+cosmo3769@users.noreply.github.com>
Date: Wed, 23 Aug 2023 02:11:44 +0530
Subject: [PATCH 14/17] Add support of segmentation mask in RandomCutout
 (#2004)

* added support of segmentation-mask

* added demo

* add test

* update readme

* random cutout removed from mask

* update test
---
 .../segmentation/random_cutout_demo.py        | 34 +++++++++++++++++++
 keras_cv/layers/preprocessing/README.md       |  2 +-
 .../layers/preprocessing/random_cutout.py     |  5 +++
 .../preprocessing/random_cutout_test.py       |  6 ++++
 4 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 examples/layers/preprocessing/segmentation/random_cutout_demo.py

diff --git a/examples/layers/preprocessing/segmentation/random_cutout_demo.py b/examples/layers/preprocessing/segmentation/random_cutout_demo.py
new file mode 100644
index 0000000000..71c3631b9b
--- /dev/null
+++ b/examples/layers/preprocessing/segmentation/random_cutout_demo.py
@@ -0,0 +1,34 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""random_cutout_demo.py shows how to use the RandomCutout preprocessing layer.
+
+Uses the oxford iiit pet_dataset.  In this script the pets
+are loaded, then are passed through the preprocessing layers.
+Finally, they are shown using matplotlib.
+"""
+import demo_utils
+import tensorflow as tf
+
+from keras_cv.layers import preprocessing
+
+
+def main():
+    ds = demo_utils.load_oxford_iiit_pet_dataset()
+    randomcutout = preprocessing.RandomCutout(0.5, 0.5)
+    ds = ds.map(randomcutout, num_parallel_calls=tf.data.AUTOTUNE)
+    demo_utils.visualize_dataset(ds)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/keras_cv/layers/preprocessing/README.md b/keras_cv/layers/preprocessing/README.md
index 62f0b00f0d..2e4eedf2fd 100644
--- a/keras_cv/layers/preprocessing/README.md
+++ b/keras_cv/layers/preprocessing/README.md
@@ -29,7 +29,7 @@ The provided table gives an overview of the different augmentation layers availa
 | RandomContrast | ✅ | ✅ | ✅ | ✅ |
 | RandomCropAndResize | ❌ | ✅ | ✅ | ❌ |
 | RandomCrop | ✅ | ❌ | ✅ | ✅ |
-| RandomCutout | ❌ | ❌ | ❌ | ✅ |
+| RandomCutout | ❌ | ✅ | ❌ | ✅ |
 | RandomFlip | ✅ | ✅ | ✅ | ✅ |
 | RandomGaussianBlur | ❌ | ✅ | ✅ | ✅ |
 | RandomHue | ✅ | ✅ | ✅ | ✅ |
diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py
index 24aed8c455..4eb4bb4a24 100644
--- a/keras_cv/layers/preprocessing/random_cutout.py
+++ b/keras_cv/layers/preprocessing/random_cutout.py
@@ -120,6 +120,11 @@ def augment_image(self, image, transformation=None, **kwargs):
     def augment_label(self, label, transformation=None, **kwargs):
         return label
 
+    def augment_segmentation_mask(
+        self, segmentation_masks, transformation=None, **kwargs
+    ):
+        return segmentation_masks
+
     def _compute_rectangle_position(self, inputs):
         input_shape = tf.shape(inputs)
         image_height, image_width = (
diff --git a/keras_cv/layers/preprocessing/random_cutout_test.py b/keras_cv/layers/preprocessing/random_cutout_test.py
index 818486749b..14930b6fd1 100644
--- a/keras_cv/layers/preprocessing/random_cutout_test.py
+++ b/keras_cv/layers/preprocessing/random_cutout_test.py
@@ -45,23 +45,29 @@ def _run_test(self, height_factor, width_factor):
 
     def test_return_shapes(self):
         xs = np.ones((2, 512, 512, 3))
+        ys_segmentation_masks = np.ones((2, 512, 512, 3))
 
         layer = preprocessing.RandomCutout(
             height_factor=0.5, width_factor=0.5, seed=1
         )
         xs = layer(xs)
+        ys_segmentation_masks = layer(ys_segmentation_masks)
 
         self.assertEqual(xs.shape, [2, 512, 512, 3])
+        self.assertEqual(ys_segmentation_masks.shape, [2, 512, 512, 3])
 
     def test_return_shapes_single_element(self):
         xs = np.ones((512, 512, 3))
+        ys_segmentation_masks = np.ones((512, 512, 3))
 
         layer = preprocessing.RandomCutout(
             height_factor=0.5, width_factor=0.5, seed=1
         )
         xs = layer(xs)
+        ys_segmentation_masks = layer(ys_segmentation_masks)
 
         self.assertEqual(xs.shape, [512, 512, 3])
+        self.assertEqual(ys_segmentation_masks.shape, [512, 512, 3])
 
     def test_random_cutout_single_float(self):
         self._run_test(0.5, 0.5)

From b038f583164fdc950acfffb599fa57f77057cc87 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Thu, 24 Aug 2023 14:34:20 -0600
Subject: [PATCH 15/17] Remove forward slashes from layer names for backbones
 (#2037)

---
 .../backbones/densenet/densenet_backbone.py      |  6 +++---
 .../mobilenet_v3/mobilenet_v3_backbone.py        | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/keras_cv/models/backbones/densenet/densenet_backbone.py b/keras_cv/models/backbones/densenet/densenet_backbone.py
index 98b9eea0e5..28109b64fa 100644
--- a/keras_cv/models/backbones/densenet/densenet_backbone.py
+++ b/keras_cv/models/backbones/densenet/densenet_backbone.py
@@ -88,12 +88,12 @@ def __init__(
             x = keras.layers.Rescaling(1 / 255.0)(x)
 
         x = keras.layers.Conv2D(
-            64, 7, strides=2, use_bias=False, padding="same", name="conv1/conv"
+            64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv"
         )(x)
         x = keras.layers.BatchNormalization(
-            axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1/bn"
+            axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1_bn"
         )(x)
-        x = keras.layers.Activation("relu", name="conv1/relu")(x)
+        x = keras.layers.Activation("relu", name="conv1_relu")(x)
         x = keras.layers.MaxPooling2D(
             3, strides=2, padding="same", name="pool1"
         )(x)
diff --git a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py
index f92d177e99..bd033c282f 100644
--- a/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_cv/models/backbones/mobilenet_v3/mobilenet_v3_backbone.py
@@ -126,7 +126,7 @@ def __init__(
             axis=CHANNEL_AXIS,
             epsilon=BN_EPSILON,
             momentum=BN_MOMENTUM,
-            name="Conv/BatchNorm",
+            name="Conv_BatchNorm",
         )(x)
         x = apply_hard_swish(x)
 
@@ -161,7 +161,7 @@ def __init__(
             axis=CHANNEL_AXIS,
             epsilon=BN_EPSILON,
             momentum=BN_MOMENTUM,
-            name="Conv_1/BatchNorm",
+            name="Conv_1_BatchNorm",
         )(x)
         x = apply_hard_swish(x)
 
@@ -291,11 +291,11 @@ def apply_inverted_res_block(
             activation = keras.activations.get(activation)
 
     shortcut = x
-    prefix = "expanded_conv/"
+    prefix = "expanded_conv_"
     infilters = x.shape[CHANNEL_AXIS]
 
     if expansion_index > 0:
-        prefix = f"expanded_conv_{expansion_index}/"
+        prefix = f"expanded_conv_{expansion_index}_"
 
         x = keras.layers.Conv2D(
             adjust_channels(infilters * expansion),
@@ -308,14 +308,14 @@ def apply_inverted_res_block(
             axis=CHANNEL_AXIS,
             epsilon=BN_EPSILON,
             momentum=BN_MOMENTUM,
-            name=prefix + "expand/BatchNorm",
+            name=prefix + "expand_BatchNorm",
         )(x)
         x = activation(x)
 
     if stride == 2:
         x = keras.layers.ZeroPadding2D(
             padding=utils.correct_pad_downsample(x, kernel_size),
-            name=prefix + "depthwise/pad",
+            name=prefix + "depthwise_pad",
         )(x)
 
     x = keras.layers.DepthwiseConv2D(
@@ -329,7 +329,7 @@ def apply_inverted_res_block(
         axis=CHANNEL_AXIS,
         epsilon=BN_EPSILON,
         momentum=BN_MOMENTUM,
-        name=prefix + "depthwise/BatchNorm",
+        name=prefix + "depthwise_BatchNorm",
     )(x)
     x = activation(x)
 
@@ -353,7 +353,7 @@ def apply_inverted_res_block(
         axis=CHANNEL_AXIS,
         epsilon=BN_EPSILON,
         momentum=BN_MOMENTUM,
-        name=prefix + "project/BatchNorm",
+        name=prefix + "project_BatchNorm",
     )(x)
 
     if stride == 1 and infilters == filters:

From ab812d1b4d6414fac0100238c982472dc5d6e8c6 Mon Sep 17 00:00:00 2001
From: David Landup <60978046+DavidLandup0@users.noreply.github.com>
Date: Fri, 25 Aug 2023 00:56:39 +0200
Subject: [PATCH 16/17] [DeepVision Port] SegFormer and Mix-Transformers
 (#1946)

* initial dump

* add all basic layers, port roughly to keras core ops

* updated .gitignore

* segformer head and formatting

* cleanup

* remove tf call

* remove tf

* migrating to more keras ops

* cleanups and fixes

* fix reshaping

* comments

* from presets api, keras.ops -> ops

* embed_dims -> embedding_dims

* addressing some PR comments

* docstrings, argument update

* depths arg

* sync

* compute output shapes

* segformer progress

* head

* softmax

* remove softmax

* undo compute_output_shapes()

* efficientmultiheadattention -> segformermultiheadattention

* docstrings

* softmax output

* segformer presets

* updating segformer presets

* segformer presets

* import aliases

* refactoring

* pr comments

* pr comments

* add aliases

* aliases ot init

* refactor fix

* import keras_cv_export

* fix presets/aliases and add copyright

* linter warnings

* linter errors

* consistency in presets

* return config

* fix serialization

* Some cleanup + more tests

* Fix DropPath layer (need to update tests + add shim for tf.keras

* Finish DropPath layer

* Use static shape in backbone

* Formatting

* Switch back to ops.shape

* documentation

* documentation

* remove default num classes

* fix docs

---------

Co-authored-by: ianjjohnson <3072903+ianstenbit@users.noreply.github.com>
---
 .gitignore                                    |   1 +
 keras_cv/backend/__init__.py                  |   1 +
 keras_cv/backend/random.py                    |  20 ++
 keras_cv/layers/__init__.py                   |   9 +
 .../hierarchical_transformer_encoder.py       | 140 ++++++++++
 .../layers/overlapping_patching_embedding.py  |  85 ++++++
 keras_cv/layers/regularization/drop_path.py   |  20 +-
 .../layers/regularization/drop_path_test.py   |  18 +-
 .../layers/segformer_multihead_attention.py   | 132 +++++++++
 keras_cv/models/__init__.py                   |  28 ++
 .../backbones/mix_transformer/__init__.py     |  13 +
 .../mix_transformer_aliases.py                | 262 ++++++++++++++++++
 .../mix_transformer_backbone.py               | 188 +++++++++++++
 .../mix_transformer_backbone_presets.py       | 153 ++++++++++
 .../mix_transformer_backbone_presets_test.py  | 100 +++++++
 .../mix_transformer_backbone_test.py          |  69 +++++
 keras_cv/models/segmentation/__init__.py      |   1 +
 .../models/segmentation/segformer/__init__.py |  15 +
 .../segmentation/segformer/segformer.py       | 175 ++++++++++++
 .../segformer/segformer_aliases.py            | 244 ++++++++++++++++
 .../segformer/segformer_presets.py            | 105 +++++++
 .../segmentation/segformer/segformer_test.py  |  92 ++++++
 22 files changed, 1855 insertions(+), 16 deletions(-)
 create mode 100644 keras_cv/backend/random.py
 create mode 100644 keras_cv/layers/hierarchical_transformer_encoder.py
 create mode 100644 keras_cv/layers/overlapping_patching_embedding.py
 create mode 100644 keras_cv/layers/segformer_multihead_attention.py
 create mode 100644 keras_cv/models/backbones/mix_transformer/__init__.py
 create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py
 create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py
 create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py
 create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py
 create mode 100644 keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py
 create mode 100644 keras_cv/models/segmentation/segformer/__init__.py
 create mode 100644 keras_cv/models/segmentation/segformer/segformer.py
 create mode 100644 keras_cv/models/segmentation/segformer/segformer_aliases.py
 create mode 100644 keras_cv/models/segmentation/segformer/segformer_presets.py
 create mode 100644 keras_cv/models/segmentation/segformer/segformer_test.py

diff --git a/.gitignore b/.gitignore
index 6a59b32803..68d68189bd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ __pycache__/
 .vscode/
 .devcontainer/
 .coverage
+.history
diff --git a/keras_cv/backend/__init__.py b/keras_cv/backend/__init__.py
index da703722b9..7440acbd38 100644
--- a/keras_cv/backend/__init__.py
+++ b/keras_cv/backend/__init__.py
@@ -76,6 +76,7 @@
 
 from keras_cv.backend import config  # noqa: E402
 from keras_cv.backend import ops  # noqa: E402
+from keras_cv.backend import random  # noqa: E402
 from keras_cv.backend import tf_ops  # noqa: E402
 
 
diff --git a/keras_cv/backend/random.py b/keras_cv/backend/random.py
new file mode 100644
index 0000000000..21d4b08c7d
--- /dev/null
+++ b/keras_cv/backend/random.py
@@ -0,0 +1,20 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.backend.config import multi_backend
+
+if multi_backend():
+    from keras_core.random import *  # noqa: F403, F401
+else:
+    from keras_core.src.backend.tensorflow.random import *  # noqa: F403, F401
diff --git a/keras_cv/layers/__init__.py b/keras_cv/layers/__init__.py
index c8b01f2769..342a942f64 100644
--- a/keras_cv/layers/__init__.py
+++ b/keras_cv/layers/__init__.py
@@ -19,6 +19,9 @@
 from keras_cv.layers.augmenter import Augmenter
 from keras_cv.layers.feature_pyramid import FeaturePyramid
 from keras_cv.layers.fusedmbconv import FusedMBConvBlock
+from keras_cv.layers.hierarchical_transformer_encoder import (
+    HierarchicalTransformerEncoder,
+)
 from keras_cv.layers.mbconv import MBConvBlock
 from keras_cv.layers.object_detection.anchor_generator import AnchorGenerator
 from keras_cv.layers.object_detection.box_matcher import BoxMatcher
@@ -32,6 +35,9 @@
     CenterNetLabelEncoder,
 )
 from keras_cv.layers.object_detection_3d.voxelization import DynamicVoxelization
+from keras_cv.layers.overlapping_patching_embedding import (
+    OverlappingPatchingAndEmbedding,
+)
 from keras_cv.layers.preprocessing.aug_mix import AugMix
 from keras_cv.layers.preprocessing.auto_contrast import AutoContrast
 from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
@@ -124,6 +130,9 @@
 from keras_cv.layers.regularization.dropblock_2d import DropBlock2D
 from keras_cv.layers.regularization.squeeze_excite import SqueezeAndExcite2D
 from keras_cv.layers.regularization.stochastic_depth import StochasticDepth
+from keras_cv.layers.segformer_multihead_attention import (
+    SegFormerMultiheadAttention,
+)
 from keras_cv.layers.spatial_pyramid import SpatialPyramidPooling
 from keras_cv.layers.transformer_encoder import TransformerEncoder
 from keras_cv.layers.vit_layers import PatchingAndEmbedding
diff --git a/keras_cv/layers/hierarchical_transformer_encoder.py b/keras_cv/layers/hierarchical_transformer_encoder.py
new file mode 100644
index 0000000000..ee67a17b56
--- /dev/null
+++ b/keras_cv/layers/hierarchical_transformer_encoder.py
@@ -0,0 +1,140 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.layers.regularization.drop_path import DropPath
+from keras_cv.layers.segformer_multihead_attention import (
+    SegFormerMultiheadAttention,
+)
+
+
+@keras_cv_export("keras_cv.layers.HierarchicalTransformerEncoder")
+class HierarchicalTransformerEncoder(keras.layers.Layer):
+    """
+    Hierarchical transformer encoder block implementation as a Keras Layer.
+    The layer uses `SegFormerMultiheadAttention` as a `MultiHeadAttention`
+    alternative for computational efficiency, and is meant to be used
+    within the SegFormer architecture.
+
+    References:
+        - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501
+        - [Official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501
+        - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/hierarchical_transformer_encoder.py) # noqa: E501
+
+    Args:
+        project_dim: integer, the dimensionality of the projection of the
+            encoder, and output of the `SegFormerMultiheadAttention` layer.
+            Due to the residual addition the input dimensionality has to be
+            equal to the output dimensionality.
+        num_heads: integer, the number of heads for the
+            `SegFormerMultiheadAttention` layer.
+        drop_prob: float, the probability of dropping a random
+            sample using the `DropPath` layer. Defaults to `0.0`.
+        layer_norm_epsilon: float, the epsilon for
+            `LayerNormalization` layers. Defaults to `1e-06`
+        sr_ratio: integer, the ratio to use within
+            `SegFormerMultiheadAttention`. If set to > 1, a `Conv2D`
+             layer is used to reduce the length of the sequence. Defaults to `1`.
+
+    Basic usage:
+
+    ```
+    project_dim = 1024
+    num_heads = 4
+    patch_size = 16
+
+    encoded_patches = keras_cv.layers.OverlappingPatchingAndEmbedding(
+    project_dim=project_dim, patch_size=patch_size)(img_batch)
+
+    trans_encoded = keras_cv.layers.HierarchicalTransformerEncoder(project_dim=project_dim,
+                                                                   num_heads=num_heads,
+                                                                   sr_ratio=1)(encoded_patches)
+
+    print(trans_encoded.shape) # (1, 3136, 1024)
+    ```
+    """
+
+    def __init__(
+        self,
+        project_dim,
+        num_heads,
+        sr_ratio=1,
+        drop_prob=0.0,
+        layer_norm_epsilon=1e-6,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.project_dim = project_dim
+        self.num_heads = num_heads
+        self.drop_prop = drop_prob
+
+        self.norm1 = keras.layers.LayerNormalization(epsilon=layer_norm_epsilon)
+        self.attn = SegFormerMultiheadAttention(
+            project_dim, num_heads, sr_ratio
+        )
+        self.drop_path = DropPath(drop_prob)
+        self.norm2 = keras.layers.LayerNormalization(epsilon=layer_norm_epsilon)
+        self.mlp = self.MixFFN(
+            channels=project_dim,
+            mid_channels=int(project_dim * 4),
+        )
+
+    def build(self, input_shape):
+        super().build(input_shape)
+        self.H = ops.sqrt(ops.cast(input_shape[1], "float32"))
+        self.W = ops.sqrt(ops.cast(input_shape[2], "float32"))
+
+    def call(self, x):
+        x = x + self.drop_path(self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "mlp": keras.saving.serialize_keras_object(self.mlp),
+                "project_dim": self.project_dim,
+                "num_heads": self.num_heads,
+                "drop_prop": self.drop_prop,
+            }
+        )
+        return config
+
+    class MixFFN(keras.layers.Layer):
+        def __init__(self, channels, mid_channels):
+            super().__init__()
+            self.fc1 = keras.layers.Dense(mid_channels)
+            self.dwconv = keras.layers.DepthwiseConv2D(
+                kernel_size=3,
+                strides=1,
+                padding="same",
+            )
+            self.fc2 = keras.layers.Dense(channels)
+
+        def call(self, x):
+            x = self.fc1(x)
+            shape = ops.shape(x)
+            H, W = int(math.sqrt(shape[1])), int(math.sqrt(shape[1]))
+            B, C = shape[0], shape[2]
+            x = ops.reshape(x, (B, H, W, C))
+            x = self.dwconv(x)
+            x = ops.reshape(x, (B, -1, C))
+            x = ops.nn.gelu(x)
+            x = self.fc2(x)
+            return x
diff --git a/keras_cv/layers/overlapping_patching_embedding.py b/keras_cv/layers/overlapping_patching_embedding.py
new file mode 100644
index 0000000000..69060087ec
--- /dev/null
+++ b/keras_cv/layers/overlapping_patching_embedding.py
@@ -0,0 +1,85 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+
+
+@keras_cv_export("keras_cv.layers.OverlappingPatchingAndEmbedding")
+class OverlappingPatchingAndEmbedding(keras.layers.Layer):
+    def __init__(self, project_dim=32, patch_size=7, stride=4, **kwargs):
+        """
+        Overlapping Patching and Embedding layer. Differs from `PatchingAndEmbedding`
+        in that the patch size does not affect the sequence length. It's fully derived
+        from the `stride` parameter. Additionally, no positional embedding is done
+        as part of the layer - only a projection using a `Conv2D` layer.
+
+        References:
+            - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501
+            - [Official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501
+            - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/hierarchical_transformer_encoder.py) # noqa: E501
+
+        Args:
+            project_dim: integer, the dimensionality of the projection.
+                Defaults to `32`.
+            patch_size: integer, the size of the patches to encode.
+                Defaults to `7`.
+            stride: integer, the stride to use for the patching before
+                projection. Defaults to `5`.
+
+        Basic usage:
+
+        ```
+        project_dim = 1024
+        patch_size = 16
+
+        encoded_patches = keras_cv.layers.OverlappingPatchingAndEmbedding(
+        project_dim=project_dim, patch_size=patch_size)(img_batch)
+
+        print(encoded_patches.shape) # (1, 3136, 1024)
+        ```
+        """
+        super().__init__(**kwargs)
+
+        self.project_dim = project_dim
+        self.patch_size = patch_size
+        self.stride = stride
+
+        self.proj = keras.layers.Conv2D(
+            filters=project_dim,
+            kernel_size=patch_size,
+            strides=stride,
+            padding="same",
+        )
+        self.norm = keras.layers.LayerNormalization()
+
+    def call(self, x):
+        x = self.proj(x)
+        # B, H, W, C
+        shape = x.shape
+        x = ops.reshape(x, (-1, shape[1] * shape[2], shape[3]))
+        x = self.norm(x)
+        return x
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "project_dim": self.project_dim,
+                "patch_size": self.patch_size,
+                "stride": self.stride,
+            }
+        )
+        return config
diff --git a/keras_cv/layers/regularization/drop_path.py b/keras_cv/layers/regularization/drop_path.py
index e254f29493..4475e2365f 100644
--- a/keras_cv/layers/regularization/drop_path.py
+++ b/keras_cv/layers/regularization/drop_path.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from tensorflow import keras
-
 from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.backend import random
 
 
 @keras_cv_export("keras_cv.layers.DropPath")
-class DropPath(keras.__internal__.layers.BaseRandomLayer):
+class DropPath(keras.layers.Layer):
     """
     Implements the DropPath layer. DropPath randomly drops samples during
     training with a probability of `rate`. Note that this layer drops individual
@@ -47,7 +48,7 @@ class DropPath(keras.__internal__.layers.BaseRandomLayer):
     """  # noqa: E501
 
     def __init__(self, rate=0.5, seed=None, **kwargs):
-        super().__init__(seed=seed, **kwargs)
+        super().__init__(**kwargs)
         self.rate = rate
         self.seed = seed
 
@@ -55,12 +56,13 @@ def call(self, x, training=None):
         if self.rate == 0.0 or not training:
             return x
         else:
-            keep_prob = 1 - self.rate
-            drop_map_shape = (x.shape[0],) + (1,) * (len(x.shape) - 1)
-            drop_map = keras.backend.random_bernoulli(
-                drop_map_shape, p=keep_prob, seed=self.seed
+            batch_size = x.shape[0] or ops.shape(x)[0]
+            drop_map_shape = (batch_size,) + (1,) * (len(x.shape) - 1)
+            drop_map = ops.cast(
+                random.uniform(drop_map_shape, seed=self.seed) > self.rate,
+                x.dtype,
             )
-            x = x / keep_prob
+            x = x / (1.0 - self.rate)
             x = x * drop_map
             return x
 
diff --git a/keras_cv/layers/regularization/drop_path_test.py b/keras_cv/layers/regularization/drop_path_test.py
index 22f63b5223..00b4b790f0 100644
--- a/keras_cv/layers/regularization/drop_path_test.py
+++ b/keras_cv/layers/regularization/drop_path_test.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numpy as np
+import pytest
 import tensorflow as tf
 
 from keras_cv.layers import DropPath
@@ -23,7 +25,7 @@ class DropPathTest(TestCase):
 
     def test_input_unchanged_in_eval_mode(self):
         layer = DropPath(rate=0.5, seed=42)
-        inputs = tf.random.uniform(self.FEATURE_SHAPE)
+        inputs = np.random.uniform(size=self.FEATURE_SHAPE)
 
         outputs = layer(inputs, training=False)
 
@@ -31,7 +33,7 @@ def test_input_unchanged_in_eval_mode(self):
 
     def test_input_unchanged_with_rate_equal_to_zero(self):
         layer = DropPath(rate=0, seed=42)
-        inputs = tf.random.uniform(self.FEATURE_SHAPE)
+        inputs = np.random.uniform(size=self.FEATURE_SHAPE)
 
         outputs = layer(inputs, training=True)
 
@@ -39,7 +41,7 @@ def test_input_unchanged_with_rate_equal_to_zero(self):
 
     def test_input_gets_partially_zeroed_out_in_train_mode(self):
         layer = DropPath(rate=0.2, seed=42)
-        inputs = tf.random.uniform(self.FEATURE_SHAPE)
+        inputs = np.random.uniform(size=self.FEATURE_SHAPE)
 
         outputs = layer(inputs, training=True)
 
@@ -48,9 +50,11 @@ def test_input_gets_partially_zeroed_out_in_train_mode(self):
 
         self.assertGreaterEqual(non_zeros_inputs, non_zeros_outputs)
 
+    # Because randomness is inconsistent across backends, we just test with 1.
+    @pytest.mark.tf_keras_only
     def test_strict_input_gets_partially_zeroed_out_in_train_mode(self):
-        layer = DropPath(rate=0.5, seed=42)
-        inputs = tf.random.uniform(self.FEATURE_SHAPE)
+        layer = DropPath(rate=0.5, seed=10)
+        inputs = np.random.uniform(size=self.FEATURE_SHAPE)
 
         total_non_zero_inputs = 0
         total_non_zero_outputs = 0
@@ -66,6 +70,6 @@ def test_strict_input_gets_partially_zeroed_out_in_train_mode(self):
 
         self.assertAllInRange(
             total_non_zero_outputs,
-            int(0.49 * tf.cast(total_non_zero_inputs, tf.float32)),
-            int(0.51 * tf.cast(total_non_zero_inputs, tf.float32)),
+            int(0.40 * tf.cast(total_non_zero_inputs, tf.float32)),
+            int(0.60 * tf.cast(total_non_zero_inputs, tf.float32)),
         )
diff --git a/keras_cv/layers/segformer_multihead_attention.py b/keras_cv/layers/segformer_multihead_attention.py
new file mode 100644
index 0000000000..203773d4ea
--- /dev/null
+++ b/keras_cv/layers/segformer_multihead_attention.py
@@ -0,0 +1,132 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+
+
+@keras_cv_export("keras_cv.layers.SegFormerMultiheadAttention")
+class SegFormerMultiheadAttention(keras.layers.Layer):
+    def __init__(self, project_dim, num_heads, sr_ratio):
+        """
+        Efficient MultiHeadAttention implementation as a Keras layer.
+        A huge bottleneck in scaling transformers is the self-attention layer
+        with an O(n^2) complexity.
+
+        SegFormerMultiheadAttention performs a sequence reduction (SR) operation
+        with a given ratio, to reduce the sequence length before performing key and value projections,
+        reducing the O(n^2) complexity to O(n^2/R) where R is the sequence reduction ratio.
+
+        References:
+        - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021) # noqa: E501
+        - [NVlabs' official implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py) # noqa: E501
+        - [@sithu31296's reimplementation](https://github.com/sithu31296/semantic-segmentation/blob/main/semseg/models/backbones/mit.py) # noqa: E501
+        - [Ported from the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/layers/efficient_attention.py) # noqa: E501
+
+        Args:
+            project_dim: integer, the dimensionality of the projection
+                of the `SegFormerMultiheadAttention` layer.
+            num_heads: integer, the number of heads to use in the
+                attention computation.
+            sr_ratio: integer, the sequence reduction ratio to perform
+                on the sequence before key and value projections.
+
+        Basic usage:
+
+        ```
+        tensor = tf.random.uniform([1, 196, 32])
+        output = keras_cv.layers.SegFormerMultiheadAttention(project_dim=768,
+                                                            num_heads=2,
+                                                            sr_ratio=4)(tensor)
+        print(output.shape) # (1, 196, 32)
+        ```
+        """
+        super().__init__()
+        self.num_heads = num_heads
+        self.sr_ratio = sr_ratio
+        self.scale = (project_dim // num_heads) ** -0.5
+        self.q = keras.layers.Dense(project_dim)
+        self.k = keras.layers.Dense(project_dim)
+        self.v = keras.layers.Dense(project_dim)
+        self.proj = keras.layers.Dense(project_dim)
+
+        if sr_ratio > 1:
+            self.sr = keras.layers.Conv2D(
+                filters=project_dim,
+                kernel_size=sr_ratio,
+                strides=sr_ratio,
+                padding="same",
+            )
+            self.norm = keras.layers.LayerNormalization()
+
+    def call(self, x):
+        input_shape = ops.shape(x)
+        H, W = int(math.sqrt(input_shape[1])), int(math.sqrt(input_shape[1]))
+        B, C = input_shape[0], input_shape[2]
+
+        q = self.q(x)
+        q = ops.reshape(
+            q,
+            (
+                input_shape[0],
+                input_shape[1],
+                self.num_heads,
+                input_shape[2] // self.num_heads,
+            ),
+        )
+        q = ops.transpose(q, [0, 2, 1, 3])
+
+        if self.sr_ratio > 1:
+            x = ops.reshape(
+                ops.transpose(x, [0, 2, 1]),
+                (B, H, W, C),
+            )
+            x = self.sr(x)
+            x = ops.reshape(x, [input_shape[0], input_shape[2], -1])
+            x = ops.transpose(x, [0, 2, 1])
+            x = self.norm(x)
+
+        k = self.k(x)
+        v = self.v(x)
+
+        k = ops.transpose(
+            ops.reshape(
+                k,
+                [B, -1, self.num_heads, C // self.num_heads],
+            ),
+            [0, 2, 1, 3],
+        )
+
+        v = ops.transpose(
+            ops.reshape(
+                v,
+                [B, -1, self.num_heads, C // self.num_heads],
+            ),
+            [0, 2, 1, 3],
+        )
+
+        attn = (q @ ops.transpose(k, [0, 1, 3, 2])) * self.scale
+        attn = ops.nn.softmax(attn, axis=-1)
+
+        attn = attn @ v
+        attn = ops.reshape(
+            ops.transpose(attn, [0, 2, 1, 3]),
+            [input_shape[0], input_shape[1], input_shape[2]],
+        )
+
+        x = self.proj(attn)
+        return x
diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py
index 4191c07575..9c83a3891a 100644
--- a/keras_cv/models/__init__.py
+++ b/keras_cv/models/__init__.py
@@ -112,6 +112,27 @@
 from keras_cv.models.backbones.efficientnet_v2.efficientnet_v2_aliases import (
     EfficientNetV2SBackbone,
 )
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB0Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB1Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB2Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB3Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB4Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB5Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTBackbone,
+)
 from keras_cv.models.backbones.mobilenet_v3.mobilenet_v3_aliases import (
     MobileNetV3LargeBackbone,
 )
@@ -166,5 +187,12 @@
     YOLOV8Detector,
 )
 from keras_cv.models.segmentation import DeepLabV3Plus
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormer
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB0
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB1
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB2
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB3
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB4
+from keras_cv.models.segmentation.segformer.segformer_aliases import SegFormerB5
 from keras_cv.models.stable_diffusion import StableDiffusion
 from keras_cv.models.stable_diffusion import StableDiffusionV2
diff --git a/keras_cv/models/backbones/mix_transformer/__init__.py b/keras_cv/models/backbones/mix_transformer/__init__.py
new file mode 100644
index 0000000000..3992ffb59a
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py
new file mode 100644
index 0000000000..7c7ea6a8b6
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_aliases.py
@@ -0,0 +1,262 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import (
+    MiTBackbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import (  # noqa: E501
+    backbone_presets,
+)
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """MiT model.
+
+    For transfer learning use cases, make sure to read the
+    [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
+
+    Args:
+        include_rescaling: bool, whether to rescale the inputs. If set to
+            True, inputs will be passed through a `Rescaling(scale=1 / 255)`
+            layer. Defaults to True.
+        input_shape: optional shape tuple, defaults to (None, None, 3).
+        input_tensor: optional Keras tensor (i.e., output of `layers.Input()`)
+            to use as image input for the model.
+
+    Examples:
+    ```python
+    input_data = tf.ones(shape=(8, 224, 224, 3))
+
+    # Randomly initialized backbone
+    model = {name}Backbone()
+    output = model(input_data)
+    ```
+"""  # noqa: E501
+
+
+class MiTB0Backbone(MiTBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return MiTBackbone.from_preset("mit_b0", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "mit_b0_imagenet": copy.deepcopy(
+                backbone_presets["mit_b0_imagenet"]
+            ),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+class MiTB1Backbone(MiTBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return MiTBackbone.from_preset("mit_b1", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+
+class MiTB2Backbone(MiTBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return MiTBackbone.from_preset("mit_b2", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+
+class MiTB3Backbone(MiTBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return MiTBackbone.from_preset("mit_b3", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+
+class MiTB4Backbone(MiTBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return MiTBackbone.from_preset("mit_b4", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+
+class MiTB5Backbone(MiTBackbone):
+    def __new__(
+        cls,
+        include_rescaling=True,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "include_rescaling": include_rescaling,
+                "input_shape": input_shape,
+                "input_tensor": input_tensor,
+            }
+        )
+        return MiTBackbone.from_preset("mit_b5", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations."""
+        return {}
+
+
+setattr(
+    MiTB0Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="MiTB0"),
+)
+
+setattr(
+    MiTB1Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="MiTB1"),
+)
+
+setattr(
+    MiTB2Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="MiTB2"),
+)
+
+setattr(
+    MiTB3Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="MiTB3"),
+)
+
+setattr(
+    MiTB4Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="MiTB4"),
+)
+
+setattr(
+    MiTB5Backbone,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="MiTB5"),
+)
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py
new file mode 100644
index 0000000000..bf6a1a6ec2
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone.py
@@ -0,0 +1,188 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MiT backbone model.
+
+References:
+  - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) (CVPR 2021)
+  - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/blob/main/deepvision/models/classification/mix_transformer/mit_tf.py)
+  - [Based on the NVlabs' official PyTorch implementation](https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/backbones/mix_transformer.py)
+  - [Inspired by @sithu31296's reimplementation](https://github.com/sithu31296/semantic-segmentation/blob/main/semseg/models/backbones/mit.py)
+"""  # noqa: E501
+
+import copy
+
+import numpy as np
+
+from keras_cv import layers as cv_layers
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.models import utils
+from keras_cv.models.backbones.backbone import Backbone
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import (  # noqa: E501
+    backbone_presets,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import (  # noqa: E501
+    backbone_presets_with_weights,
+)
+from keras_cv.utils.python_utils import classproperty
+
+
+@keras_cv_export("keras_cv.models.MiTBackbone")
+class MiTBackbone(Backbone):
+    def __init__(
+        self,
+        include_rescaling,
+        depths,
+        input_shape=(224, 224, 3),
+        input_tensor=None,
+        embedding_dims=None,
+        **kwargs,
+    ):
+        """A Keras model implementing the MixTransformer architecture to be
+        used as a backbone for the SegFormer architecture.
+
+        References:
+            - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501
+            - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/classification/mix_transformer) # noqa: E501
+
+        Args:
+            include_rescaling: bool, whether to rescale the inputs. If set
+                to `True`, inputs will be passed through a `Rescaling(1/255.0)`
+                layer.
+            depths: the number of transformer encoders to be used per stage in the
+                network
+            embedding_dims: the embedding dims per hierarchical stage, used as
+                the levels of the feature pyramid
+            input_shape: optional shape tuple, defaults to (None, None, 3).
+            input_tensor: optional Keras tensor (i.e. output of `keras.layers.Input()`)
+                to use as image input for the model.
+
+        Examples:
+
+        Using the class with a `backbone`:
+
+        ```python
+        import tensorflow as tf
+        import keras_cv
+
+        images = np.ones(shape=(1, 96, 96, 3))
+        labels = np.zeros(shape=(1, 96, 96, 1))
+        backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet")
+
+        # Evaluate model
+        model(images)
+
+        # Train model
+        model.compile(
+            optimizer="adam",
+            loss=keras.losses.BinaryCrossentropy(from_logits=False),
+            metrics=["accuracy"],
+        )
+        model.fit(images, labels, epochs=3)
+        ```
+        """
+        drop_path_rate = 0.1
+        dpr = [x for x in np.linspace(0.0, drop_path_rate, sum(depths))]
+        blockwise_num_heads = [1, 2, 5, 8]
+        blockwise_sr_ratios = [8, 4, 2, 1]
+        num_stages = 4
+
+        cur = 0
+        patch_embedding_layers = []
+        transformer_blocks = []
+        layer_norms = []
+
+        for i in range(num_stages):
+            patch_embed_layer = cv_layers.OverlappingPatchingAndEmbedding(
+                project_dim=embedding_dims[0] if i == 0 else embedding_dims[i],
+                patch_size=7 if i == 0 else 3,
+                stride=4 if i == 0 else 2,
+                name=f"patch_and_embed_{i}",
+            )
+            patch_embedding_layers.append(patch_embed_layer)
+
+            transformer_block = [
+                cv_layers.HierarchicalTransformerEncoder(
+                    project_dim=embedding_dims[i],
+                    num_heads=blockwise_num_heads[i],
+                    sr_ratio=blockwise_sr_ratios[i],
+                    drop_prob=dpr[cur + k],
+                    name=f"hierarchical_encoder_{i}_{k}",
+                )
+                for k in range(depths[i])
+            ]
+            transformer_blocks.append(transformer_block)
+            cur += depths[i]
+            layer_norms.append(keras.layers.LayerNormalization())
+
+        inputs = utils.parse_model_inputs(input_shape, input_tensor)
+        x = inputs
+
+        if include_rescaling:
+            x = keras.layers.Rescaling(scale=1 / 255)(x)
+
+        pyramid_level_inputs = []
+        for i in range(num_stages):
+            # Compute new height/width after the `proj`
+            # call in `OverlappingPatchingAndEmbedding`
+            stride = 4 if i == 0 else 2
+            new_height, new_width = (
+                int(ops.shape(x)[1] / stride),
+                int(ops.shape(x)[2] / stride),
+            )
+
+            x = patch_embedding_layers[i](x)
+            for blk in transformer_blocks[i]:
+                x = blk(x)
+            x = layer_norms[i](x)
+            x = keras.layers.Reshape(
+                (new_height, new_width, -1), name=f"output_level_{i}"
+            )(x)
+            pyramid_level_inputs.append(utils.get_tensor_input_name(x))
+
+        super().__init__(inputs=inputs, outputs=x, **kwargs)
+
+        self.depths = depths
+        self.embedding_dims = embedding_dims
+        self.include_rescaling = include_rescaling
+        self.input_tensor = input_tensor
+        self.pyramid_level_inputs = {
+            f"P{i + 1}": name for i, name in enumerate(pyramid_level_inputs)
+        }
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "depths": self.depths,
+                "embedding_dims": self.embedding_dims,
+                "include_rescaling": self.include_rescaling,
+                "input_shape": self.input_shape[1:],
+                "input_tensor": self.input_tensor,
+            }
+        )
+        return config
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return copy.deepcopy(backbone_presets)
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return copy.deepcopy(backbone_presets_with_weights)
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py
new file mode 100644
index 0000000000..a4c1c2a3e1
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets.py
@@ -0,0 +1,153 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MiT model preset configurations."""
+
+backbone_presets_no_weights = {
+    "mit_b0": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 8 transformer blocks."
+            ),
+            "params": 3321962,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [32, 64, 160, 256],
+            "depths": [2, 2, 2, 2],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+    },
+    "mit_b1": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 8 transformer blocks."
+            ),
+            "params": 13156554,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [64, 128, 320, 512],
+            "depths": [2, 2, 2, 2],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+    },
+    "mit_b2": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 16 transformer blocks."
+            ),
+            "params": 24201418,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [64, 128, 320, 512],
+            "depths": [3, 4, 6, 3],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+    },
+    "mit_b3": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 28 transformer blocks."
+            ),
+            "params": 44077258,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [64, 128, 320, 512],
+            "depths": [3, 4, 18, 3],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+    },
+    "mit_b4": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 41 transformer blocks."
+            ),
+            "params": 60847818,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [64, 128, 320, 512],
+            "depths": [3, 8, 27, 3],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+    },
+    "mit_b5": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 52 transformer blocks."
+            ),
+            "params": 81448138,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [64, 128, 320, 512],
+            "depths": [3, 6, 40, 3],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+    },
+}
+
+backbone_presets_with_weights = {
+    "mit_b0_imagenet": {
+        "metadata": {
+            "description": (
+                "MiT (MixTransformer) model with 8 transformer blocks. Pre-trained on ImageNet-1K and scores 69% top-1 accuracy on the validation set."  # noqa: E501
+            ),
+            "params": 3321962,
+            "official_name": "MiT",
+            "path": "mit",
+        },
+        "class_name": "keras_cv>MiTBackbone",
+        "config": {
+            "embedding_dims": [32, 64, 160, 256],
+            "depths": [2, 2, 2, 2],
+            "include_rescaling": True,
+            "input_shape": (224, 224, 3),
+            "input_tensor": None,
+        },
+        "weights_url": "https://storage.googleapis.com/keras-cv/models/mitb0/imagenet/classification-v0.h5",  # noqa: E501
+        "weights_hash": "8e0c416cd330b6fa0bcfb3a5ccc43edcbcabf6a463aee3c2a9b6a1398c207d10",  # noqa: E501
+    },
+}
+
+backbone_presets = {
+    **backbone_presets_no_weights,
+    **backbone_presets_with_weights,
+}
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py
new file mode 100644
index 0000000000..0bc443ee92
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py
@@ -0,0 +1,100 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for loading pretrained model presets."""
+
+import numpy as np
+import pytest
+
+from keras_cv.backend import ops
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB0Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import (
+    MiTBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+
+
+@pytest.mark.large
+class MixTransformerPresetSmokeTest(TestCase):
+    """
+    A smoke test for MixTransformer presets we run continuously.
+    This only tests the smallest weights we have available. Run with:
+    `pytest keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py --run_large`  # noqa: E501
+    """
+
+    def setUp(self):
+        self.input_batch = np.ones(shape=(2, 224, 224, 3))
+
+    def test_backbone_output(self):
+        model = MiTBackbone.from_preset("mit_b0")
+        model(self.input_batch)
+
+    def test_backbone_output_with_weights(self):
+        model = MiTBackbone.from_preset("mit_b0_imagenet")
+
+        # The forward pass from a preset should be stable!
+        # This test should catch cases where we unintentionally change our
+        # network code in a way that would invalidate our preset weights.
+        # We should only update these numbers if we are updating a weights
+        # file, or have found a discrepancy with the upstream source.
+
+        outputs = model(np.ones(shape=(1, 224, 224, 3)))
+        expected = [-0.603472, -0.180627, -1.92137, -0.004339, 2.396384]
+        # Keep a high tolerance, so we are robust to different hardware.
+        self.assertAllClose(
+            ops.convert_to_numpy(outputs[0, 0, 0, :5]),
+            expected,
+            atol=0.01,
+            rtol=0.01,
+        )
+
+    def test_applications_model_output(self):
+        model = MiTB0Backbone()
+        model(self.input_batch)
+
+    def test_applications_model_output_with_preset(self):
+        model = MiTB0Backbone.from_preset("mit_b0_imagenet")
+        model(self.input_batch)
+
+    def test_preset_docstring(self):
+        """Check we did our docstring formatting correctly."""
+        for name in MiTBackbone.presets:
+            self.assertRegex(MiTBackbone.from_preset.__doc__, name)
+
+    def test_unknown_preset_error(self):
+        # Not a preset name
+        with self.assertRaises(ValueError):
+            MiTBackbone.from_preset("mit_b0_clowntown")
+
+    def test_load_weights_error(self):
+        # Try to load weights when none available
+        with self.assertRaises(ValueError):
+            MiTBackbone.from_preset("mit_b0", load_weights=True)
+
+
+@pytest.mark.extra_large
+class MixTransformerPresetFullTest(TestCase):
+    """
+    Test the full enumeration of our preset.
+    This tests every preset for Mix Transformer and is only run manually.
+    Run with:
+    `pytest keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_presets_test.py --run_extra_large`  # noqa: E501
+    """
+
+    def test_load_mix_transformer(self):
+        input_data = np.ones(shape=(2, 224, 224, 3))
+        for preset in MiTBackbone.presets:
+            model = MiTBackbone.from_preset(preset)
+            model(input_data)
diff --git a/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py
new file mode 100644
index 0000000000..f24596bdfe
--- /dev/null
+++ b/keras_cv/models/backbones/mix_transformer/mix_transformer_backbone_test.py
@@ -0,0 +1,69 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.models.backbones.mix_transformer.mix_transformer_aliases import (
+    MiTB0Backbone,
+)
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone import (
+    MiTBackbone,
+)
+from keras_cv.tests.test_case import TestCase
+
+
+class MixTransformerBackboneTest(TestCase):
+    def setUp(self):
+        self.input_batch = np.ones(shape=(2, 224, 224, 3))
+
+    def test_valid_call(self):
+        model = MiTB0Backbone()
+        model(self.input_batch)
+
+    @pytest.mark.large  # Saving is slow, so mark these large.
+    def test_saved_model(self):
+        model = MiTB0Backbone(
+            include_rescaling=False,
+        )
+        model_output = model(self.input_batch)
+        save_path = os.path.join(self.get_temp_dir(), "mit_backbone.keras")
+        model.save(save_path)
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        self.assertIsInstance(restored_model, MiTBackbone)
+
+        # Check that output matches.
+        restored_output = restored_model(self.input_batch)
+        self.assertAllClose(
+            ops.convert_to_numpy(model_output),
+            ops.convert_to_numpy(restored_output),
+        )
+
+    @parameterized.named_parameters(
+        ("one_channel", 1),
+        ("four_channels", 4),
+    )
+    def test_application_variable_input_channels(self, num_channels):
+        model = MiTB0Backbone(
+            input_shape=(224, 224, num_channels),
+            include_rescaling=False,
+        )
+        self.assertEqual(model.output_shape, (None, 7, 7, 256))
diff --git a/keras_cv/models/segmentation/__init__.py b/keras_cv/models/segmentation/__init__.py
index 122dc4191e..f25ee4ea7c 100644
--- a/keras_cv/models/segmentation/__init__.py
+++ b/keras_cv/models/segmentation/__init__.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 
 from keras_cv.models.segmentation.deeplab_v3_plus import DeepLabV3Plus
+from keras_cv.models.segmentation.segformer import SegFormer
diff --git a/keras_cv/models/segmentation/segformer/__init__.py b/keras_cv/models/segmentation/segformer/__init__.py
new file mode 100644
index 0000000000..59d29582c2
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.models.segmentation.segformer.segformer import SegFormer
diff --git a/keras_cv/models/segmentation/segformer/segformer.py b/keras_cv/models/segmentation/segformer/segformer.py
new file mode 100644
index 0000000000..0985b13749
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer.py
@@ -0,0 +1,175 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from keras_cv.api_export import keras_cv_export
+from keras_cv.backend import keras
+from keras_cv.models.segmentation.segformer.segformer_presets import (  # noqa: E501
+    presets,
+)
+from keras_cv.models.segmentation.segformer.segformer_presets import (  # noqa: E501
+    presets_with_weights,
+)
+from keras_cv.models.task import Task
+from keras_cv.utils.python_utils import classproperty
+from keras_cv.utils.train import get_feature_extractor
+
+
+@keras_cv_export("keras_cv.models.segmentation.SegFormer")
+class SegFormer(Task):
+    """A Keras model implementing the SegFormer architecture for semantic
+    segmentation.
+
+    References:
+        - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501
+        - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501
+
+    Args:
+        backbone: `keras.Model`. The backbone network for the model that is
+            used as a feature extractor for the SegFormer encoder.
+            It is *intended* to be used only with the MiT backbone model which
+            was created specifically for SegFormers. It should either be a
+            `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model`
+            that implements the `pyramid_level_inputs` property with keys
+            "P2", "P3", "P4", and "P5" and layer names as
+            values.
+        num_classes: int, the number of classes for the detection model,
+            including the background class.
+        projection_filters: int, number of filters in the
+            convolution layer projecting the concatenated features into
+            a segmentation map. Defaults to 256`.
+
+    Examples:
+
+    Using the class with a `backbone`:
+
+    ```python
+    import tensorflow as tf
+    import keras_cv
+
+    images = np.ones(shape=(1, 96, 96, 3))
+    labels = np.zeros(shape=(1, 96, 96, 1))
+    backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet")
+    model = keras_cv.models.segmentation.SegFormer(
+        num_classes=1, backbone=backbone,
+    )
+
+    # Evaluate model
+    model(images)
+
+    # Train model
+    model.compile(
+        optimizer="adam",
+        loss=keras.losses.BinaryCrossentropy(from_logits=False),
+        metrics=["accuracy"],
+    )
+    model.fit(images, labels, epochs=3)
+    ```
+    """
+
+    def __init__(
+        self,
+        backbone,
+        num_classes,
+        projection_filters=256,
+        **kwargs,
+    ):
+        if not isinstance(backbone, keras.layers.Layer) or not isinstance(
+            backbone, keras.Model
+        ):
+            raise ValueError(
+                "Argument `backbone` must be a `keras.layers.Layer` instance "
+                f" or `keras.Model`. Received instead "
+                f"backbone={backbone} (of type {type(backbone)})."
+            )
+
+        inputs = backbone.input
+
+        feature_extractor = get_feature_extractor(
+            backbone, list(backbone.pyramid_level_inputs.values())
+        )
+        # Multi-level dictionary
+        features = list(feature_extractor(inputs).values())
+
+        # Get H and W of level one output
+        _, H, W, _ = features[0].shape
+        # Project all multi-level outputs onto the same dimensionality
+        # and feature map shape
+        multi_layer_outs = []
+        for feature_dim, feature in zip(backbone.embedding_dims, features):
+            out = keras.layers.Dense(
+                projection_filters, name=f"linear_{feature_dim}"
+            )(feature)
+            out = keras.layers.Resizing(H, W, interpolation="bilinear")(out)
+            multi_layer_outs.append(out)
+
+        # Concat now-equal feature maps
+        concatenated_outs = keras.layers.Concatenate(axis=3)(
+            multi_layer_outs[::-1]
+        )
+
+        # Fuse concatenated features into a segmentation map
+        seg = keras.Sequential(
+            [
+                keras.layers.Conv2D(
+                    filters=projection_filters, kernel_size=1, use_bias=False
+                ),
+                keras.layers.BatchNormalization(),
+                keras.layers.Activation("relu"),
+            ]
+        )(concatenated_outs)
+
+        seg = keras.layers.Dropout(0.1)(seg)
+        seg = keras.layers.Conv2D(
+            filters=num_classes, kernel_size=1, activation="softmax"
+        )(seg)
+
+        output = keras.layers.Resizing(
+            height=inputs.shape[1],
+            width=inputs.shape[2],
+            interpolation="bilinear",
+        )(seg)
+
+        super().__init__(
+            inputs=inputs,
+            outputs=output,
+            **kwargs,
+        )
+
+        self.num_classes = num_classes
+        self.projection_filters = projection_filters
+        self.backbone = backbone
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "num_classes": self.num_classes,
+                "projection_filters": self.projection_filters,
+                "backbone": keras.saving.serialize_keras_object(self.backbone),
+            }
+        )
+        return config
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return copy.deepcopy(presets)
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return copy.deepcopy(presets_with_weights)
diff --git a/keras_cv/models/segmentation/segformer/segformer_aliases.py b/keras_cv/models/segmentation/segformer/segformer_aliases.py
new file mode 100644
index 0000000000..03547f60f2
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer_aliases.py
@@ -0,0 +1,244 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from keras_cv.models.segmentation.segformer.segformer import SegFormer
+from keras_cv.models.segmentation.segformer.segformer_presets import presets
+from keras_cv.utils.python_utils import classproperty
+
+ALIAS_DOCSTRING = """SegFormer model.
+
+    For transfer learning use cases, make sure to read the
+    [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
+
+    Args:
+        backbone: a KerasCV backbone for feature extraction.
+        num_classes: the number of classes for segmentation, including the background class.
+
+    Examples:
+    ```python
+    input_data = tf.ones(shape=(8, 224, 224, 3))
+
+    # Randomly initialized backbone
+    backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet")
+    segformer = keras_cv.models.SegFormer(backbone=backbone, num_classes=19)
+    output = model(input_data)
+    ```
+"""  # noqa: E501
+
+
+class SegFormerB0(SegFormer):
+    def __new__(
+        cls,
+        num_classes,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "num_classes": num_classes,
+            }
+        )
+        return SegFormer.from_preset("segformer_b0", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "segformer_b0": copy.deepcopy(presets["segformer_b0"]),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+class SegFormerB1(SegFormer):
+    def __new__(
+        cls,
+        num_classes,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "num_classes": num_classes,
+            }
+        )
+        return SegFormer.from_preset("segformer_b1", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "segformer_b1": copy.deepcopy(presets["segformer_b1"]),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+class SegFormerB2(SegFormer):
+    def __new__(
+        cls,
+        num_classes,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "num_classes": num_classes,
+            }
+        )
+        return SegFormer.from_preset("segformer_b2", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "segformer_b2": copy.deepcopy(presets["segformer_b2"]),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+class SegFormerB3(SegFormer):
+    def __new__(
+        cls,
+        num_classes,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "num_classes": num_classes,
+            }
+        )
+        return SegFormer.from_preset("segformer_b3", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "segformer_b3": copy.deepcopy(presets["segformer_b3"]),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+class SegFormerB4(SegFormer):
+    def __new__(
+        cls,
+        num_classes,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "num_classes": num_classes,
+            }
+        )
+        return SegFormer.from_preset("segformer_b4", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "segformer_b4": copy.deepcopy(presets["segformer_b4"]),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+class SegFormerB5(SegFormer):
+    def __new__(
+        cls,
+        num_classes,
+        **kwargs,
+    ):
+        # Pack args in kwargs
+        kwargs.update(
+            {
+                "num_classes": num_classes,
+            }
+        )
+        return SegFormer.from_preset("segformer_b5", **kwargs)
+
+    @classproperty
+    def presets(cls):
+        """Dictionary of preset names and configurations."""
+        return {
+            "segformer_b5": copy.deepcopy(presets["segformer_b5"]),
+        }
+
+    @classproperty
+    def presets_with_weights(cls):
+        """Dictionary of preset names and configurations that include
+        weights."""
+        return cls.presets
+
+
+setattr(
+    SegFormerB0,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="SegFormerB0"),
+)
+
+setattr(
+    SegFormerB1,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="SegFormerB1"),
+)
+
+setattr(
+    SegFormerB2,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="SegFormerB2"),
+)
+
+setattr(
+    SegFormerB3,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="SegFormerB3"),
+)
+
+setattr(
+    SegFormerB4,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="SegFormerB4"),
+)
+
+setattr(
+    SegFormerB5,
+    "__doc__",
+    ALIAS_DOCSTRING.format(name="SegFormerB5"),
+)
diff --git a/keras_cv/models/segmentation/segformer/segformer_presets.py b/keras_cv/models/segmentation/segformer/segformer_presets.py
new file mode 100644
index 0000000000..e19e2ec9ba
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer_presets.py
@@ -0,0 +1,105 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""SegFormer model preset configurations."""
+
+from keras_cv.models.backbones.mix_transformer.mix_transformer_backbone_presets import (  # noqa: E501
+    backbone_presets,
+)
+
+presets_no_weights = {
+    "segformer_b0": {
+        "metadata": {
+            "description": ("SegFormer model with MiTB0 backbone."),
+            "params": 3719027,
+            "official_name": "SegFormerB0",
+            "path": "segformer_b0",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {
+            "backbone": backbone_presets["mit_b0"],
+        },
+    },
+    "segformer_b1": {
+        "metadata": {
+            "description": ("SegFormer model with MiTB1 backbone."),
+            "params": 13682643,
+            "official_name": "SegFormerB1",
+            "path": "segformer_b1",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {"backbone": backbone_presets["mit_b1"]},
+    },
+    "segformer_b2": {
+        "metadata": {
+            "description": ("SegFormer model with MiTB2 backbone."),
+            "params": 24727507,
+            "official_name": "SegFormerB2",
+            "path": "segformer_b2",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {"backbone": backbone_presets["mit_b2"]},
+    },
+    "segformer_b3": {
+        "metadata": {
+            "description": ("SegFormer model with MiTB3 backbone."),
+            "params": 44603347,
+            "official_name": "SegFormerB3",
+            "path": "segformer_b3",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {"backbone": backbone_presets["mit_b3"]},
+    },
+    "segformer_b4": {
+        "metadata": {
+            "description": ("SegFormer model with MiTB4 backbone."),
+            "params": 61373907,
+            "official_name": "SegFormerB4",
+            "path": "segformer_b4",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {"backbone": backbone_presets["mit_b4"]},
+    },
+    "segformer_b5": {
+        "metadata": {
+            "description": ("SegFormer model with MiTB5 backbone."),
+            "params": 81974227,
+            "official_name": "SegFormerB5",
+            "path": "segformer_b5",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {"backbone": backbone_presets["mit_b5"]},
+    },
+}
+
+presets_with_weights = {
+    "segformer_b0_imagenet": {
+        "metadata": {
+            "description": (
+                "SegFormer model with a pretrained MiTB0 backbone."
+            ),
+            "params": 3719027,
+            "official_name": "SegFormerB0",
+            "path": "segformer_b0",
+        },
+        "class_name": "keras_cv>SegFormer",
+        "config": {
+            "backbone": backbone_presets["mit_b0_imagenet"],
+        },
+    },
+}
+
+presets = {
+    **presets_no_weights,
+    **presets_with_weights,
+}
diff --git a/keras_cv/models/segmentation/segformer/segformer_test.py b/keras_cv/models/segmentation/segformer/segformer_test.py
new file mode 100644
index 0000000000..0990e0e88f
--- /dev/null
+++ b/keras_cv/models/segmentation/segformer/segformer_test.py
@@ -0,0 +1,92 @@
+# Copyright 2023 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import pytest
+import tensorflow as tf
+
+from keras_cv.backend import keras
+from keras_cv.backend import ops
+from keras_cv.models import MiTBackbone
+from keras_cv.models import SegFormer
+from keras_cv.tests.test_case import TestCase
+
+
+class SegFormerTest(TestCase):
+    def test_segformer_construction(self):
+        backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+        model = SegFormer(backbone=backbone, num_classes=1)
+        model.compile(
+            optimizer="adam",
+            loss=keras.losses.BinaryCrossentropy(),
+            metrics=["accuracy"],
+        )
+
+    @pytest.mark.large
+    def test_segformer_call(self):
+        backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+        model = SegFormer(backbone=backbone, num_classes=1)
+        images = np.random.uniform(size=(2, 512, 512, 3))
+        _ = model(images)
+        _ = model.predict(images)
+
+    @pytest.mark.large
+    def test_weights_change(self):
+        target_size = [512, 512, 2]
+
+        images = tf.ones(shape=[1] + [512, 512, 3])
+        labels = tf.zeros(shape=[1] + target_size)
+        ds = tf.data.Dataset.from_tensor_slices((images, labels))
+        ds = ds.repeat(2)
+        ds = ds.batch(2)
+
+        backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+        model = SegFormer(backbone=backbone, num_classes=2)
+
+        model.compile(
+            optimizer="adam",
+            loss=keras.losses.BinaryCrossentropy(),
+            metrics=["accuracy"],
+        )
+
+        original_weights = model.get_weights()
+        model.fit(ds, epochs=1)
+        updated_weights = model.get_weights()
+
+        for w1, w2 in zip(original_weights, updated_weights):
+            self.assertNotAllEqual(w1, w2)
+            self.assertFalse(ops.any(ops.isnan(w2)))
+
+    @pytest.mark.large  # Saving is slow, so mark these large.
+    def test_saved_model(self):
+        target_size = [512, 512, 3]
+
+        backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3])
+        model = SegFormer(backbone=backbone, num_classes=1)
+
+        input_batch = np.ones(shape=[2] + target_size)
+        model_output = model(input_batch)
+
+        save_path = os.path.join(self.get_temp_dir(), "model.keras")
+        model.save(save_path, save_format="keras_v3")
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        self.assertIsInstance(restored_model, SegFormer)
+
+        # Check that output matches.
+        restored_output = restored_model(input_batch)
+        self.assertAllClose(model_output, restored_output)

From acd2681cae2669065207de2623527729a0db0b58 Mon Sep 17 00:00:00 2001
From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com>
Date: Fri, 25 Aug 2023 18:33:08 -0600
Subject: [PATCH 17/17] Fix test errors that were introduced by upgrading to
 Keras Core 0.1.5 (#2041)

* Fix build process for spatial pyramid pooling

* Fix label encoder for YOLOV8 for 0.1.5
---
 keras_cv/layers/spatial_pyramid.py                     | 10 +++++++++-
 .../object_detection/yolo_v8/yolo_v8_label_encoder.py  |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/keras_cv/layers/spatial_pyramid.py b/keras_cv/layers/spatial_pyramid.py
index 9c9a6849df..b45ee7bda3 100644
--- a/keras_cv/layers/spatial_pyramid.py
+++ b/keras_cv/layers/spatial_pyramid.py
@@ -91,6 +91,7 @@ def build(self, input_shape):
                 keras.layers.Activation(self.activation),
             ]
         )
+        conv_sequential.build(input_shape)
         self.aspp_parallel_channels.append(conv_sequential)
 
         # Channel 2 and afterwards are based on self.dilation_rates, and each of
@@ -109,6 +110,7 @@ def build(self, input_shape):
                     keras.layers.Activation(self.activation),
                 ]
             )
+            conv_sequential.build(input_shape)
             self.aspp_parallel_channels.append(conv_sequential)
 
         # Last channel is the global average pooling with conv2D 1x1 kernel.
@@ -125,10 +127,11 @@ def build(self, input_shape):
                 keras.layers.Activation(self.activation),
             ]
         )
+        pool_sequential.build(input_shape)
         self.aspp_parallel_channels.append(pool_sequential)
 
         # Final projection layers
-        self.projection = keras.Sequential(
+        projection = keras.Sequential(
             [
                 keras.layers.Conv2D(
                     filters=self.num_channels,
@@ -140,6 +143,11 @@ def build(self, input_shape):
                 keras.layers.Dropout(rate=self.dropout),
             ],
         )
+        projection_input_channels = (
+            2 + len(self.dilation_rates)
+        ) * self.num_channels
+        projection.build(tuple(input_shape[:-1]) + (projection_input_channels,))
+        self.projection = projection
 
     def call(self, inputs, training=None):
         """Calls the Atrous Spatial Pyramid Pooling layer on an input.
diff --git a/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py b/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py
index 48e09740f2..9595cd2ee3 100644
--- a/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py
+++ b/keras_cv/models/object_detection/yolo_v8/yolo_v8_label_encoder.py
@@ -225,7 +225,7 @@ def encode_to_targets(
 
         # return zeros if no gt boxes are present
         return ops.cond(
-            max_num_boxes > 0,
+            ops.array(max_num_boxes > 0),
             lambda: encode_to_targets(
                 pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt
             ),