From 737eecbc69ed96a653651c421daa2818b1fc0ca9 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 12:25:00 +0530
Subject: [PATCH 01/28] mobilenet_v3 added in keras-nlp

---
 keras_nlp/api/models/__init__.py              |   4 +
 keras_nlp/src/models/mobilenet_v3/__init__.py |  13 +
 .../mobilenet_v3/mobilenet_v3_backbone.py     | 357 ++++++++++++++++++
 .../mobilenet_v3_backbone_test.py             |  52 +++
 .../mobilenet_v3_image_classifier.py          | 111 ++++++
 .../mobilenet_v3_image_classifier_test.py     |  67 ++++
 6 files changed, 604 insertions(+)
 create mode 100644 keras_nlp/src/models/mobilenet_v3/__init__.py
 create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
 create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
 create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
 create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py

diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py
index 6f7e08c520..5b957ee548 100644
--- a/keras_nlp/api/models/__init__.py
+++ b/keras_nlp/api/models/__init__.py
@@ -165,6 +165,10 @@
     MistralPreprocessor,
 )
 from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import (
+    MobileNetV3ImageClassifier,
+)
 from keras_nlp.src.models.opt.opt_backbone import OPTBackbone
 from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM
 from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import (
diff --git a/keras_nlp/src/models/mobilenet_v3/__init__.py b/keras_nlp/src/models/mobilenet_v3/__init__.py
new file mode 100644
index 0000000000..2351a1b7b4
--- /dev/null
+++ b/keras_nlp/src/models/mobilenet_v3/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
new file mode 100644
index 0000000000..f85e6efa67
--- /dev/null
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -0,0 +1,357 @@
+import keras
+from keras import ops
+from keras_nlp.src.api_export import keras_nlp_export
+from keras_nlp.src.models.backbone import Backbone
+
+
+CHANNEL_AXIS = -1
+BN_EPSILON = 1e-3
+BN_MOMENTUM = 0.999
+
+
+@keras_cv_export("keras_nlp.models.MobileNetV3Backbone")
+class MobileNetV3Backbone(Backbone):
+    """Instantiates the MobileNetV3 architecture.
+
+    References:
+        - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf)
+        (ICCV 2019)
+        - [Based on the Original keras.applications MobileNetv3](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v3.py)
+
+    For transfer learning use cases, make sure to read the
+    [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
+
+    Args:
+        stackwise_expansion: list of ints or floats, the expansion ratio for
+            each inverted residual block in the model.
+        stackwise_filters: list of ints, number of filters for each inverted
+            residual block in the model.
+        stackwise_stride: list of ints, stride length for each inverted
+            residual block in the model.
+        include_rescaling: bool, whether to rescale the inputs. If set to True,
+            inputs will be passed through a `Rescaling(scale=1 / 255)`
+            layer.
+        input_shape: optional shape tuple, defaults to (None, None, 3).
+        alpha: float, controls the width of the network. This is known as the
+            depth multiplier in the MobileNetV3 paper, but the name is kept for
+            consistency with MobileNetV1 in Keras.
+            - If `alpha` < 1.0, proportionally decreases the number
+                of filters in each layer.
+            - If `alpha` > 1.0, proportionally increases the number
+                of filters in each layer.
+            - If `alpha` = 1, default number of filters from the paper
+                are used at each layer.
+
+    Example:
+    ```python
+    input_data = tf.ones(shape=(8, 224, 224, 3))
+
+    # Randomly initialized backbone with a custom config
+    model = MobileNetV3Backbone(
+        stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
+        stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
+        stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
+        stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+        stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
+        stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+        include_rescaling=False,
+    )
+    output = model(input_data)
+    ```
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        *,
+        stackwise_expansion,
+        stackwise_filters,
+        stackwise_kernel_size,
+        stackwise_stride,
+        stackwise_se_ratio,
+        stackwise_activation,
+        include_rescaling,
+        input_shape=(224, 224, 3),
+        alpha=1.0,
+        **kwargs,
+    ):
+        inputs = keras.layers.Input(shape=input_image_shape)
+        x = inputs
+
+        if include_rescaling:
+            x = keras.layers.Rescaling(scale=1 / 255)(x)
+
+        x = keras.layers.Conv2D(
+            16,
+            kernel_size=3,
+            strides=(2, 2),
+            padding="same",
+            use_bias=False,
+            name="Conv",
+        )(x)
+        x = keras.layers.BatchNormalization(
+            axis=CHANNEL_AXIS,
+            epsilon=BN_EPSILON,
+            momentum=BN_MOMENTUM,
+            name="Conv_BatchNorm",
+        )(x)
+        x = apply_hard_swish(x)
+
+        for stack_index in range(len(stackwise_filters)):
+            
+            x = apply_inverted_res_block(
+                x,
+                expansion=stackwise_expansion[stack_index],
+                filters=adjust_channels(
+                    (stackwise_filters[stack_index]) * alpha
+                ),
+                kernel_size=stackwise_kernel_size[stack_index],
+                stride=stackwise_stride[stack_index],
+                se_ratio=stackwise_se_ratio[stack_index],
+                activation=stackwise_activation[stack_index],
+                expansion_index=stack_index,
+            )
+        
+        last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
+
+        x = keras.layers.Conv2D(
+            last_conv_ch,
+            kernel_size=1,
+            padding="same",
+            use_bias=False,
+            name="Conv_1",
+        )(x)
+        x = keras.layers.BatchNormalization(
+            axis=CHANNEL_AXIS,
+            epsilon=BN_EPSILON,
+            momentum=BN_MOMENTUM,
+            name="Conv_1_BatchNorm",
+        )(x)
+        x = apply_hard_swish(x)
+
+        super().__init__(inputs=inputs, outputs=x, **kwargs)
+
+        self.stackwise_expansion = stackwise_expansion
+        self.stackwise_filters = stackwise_filters
+        self.stackwise_kernel_size = stackwise_kernel_size
+        self.stackwise_stride = stackwise_stride
+        self.stackwise_se_ratio = stackwise_se_ratio
+        self.stackwise_activation = stackwise_activation
+        self.include_rescaling = include_rescaling
+        self.alpha = alpha
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "stackwise_expansion": self.stackwise_expansion,
+                "stackwise_filters": self.stackwise_filters,
+                "stackwise_kernel_size": self.stackwise_kernel_size,
+                "stackwise_stride": self.stackwise_stride,
+                "stackwise_se_ratio": self.stackwise_se_ratio,
+                "stackwise_activation": self.stackwise_activation,
+                "include_rescaling": self.include_rescaling,
+                "input_shape": self.input_shape[1:],
+                "alpha": self.alpha,
+            }
+        )
+        return config
+
+
+class HardSigmoidActivation(keras.layers.Layer):
+    def __init__(self):
+        super().__init__()
+
+    def call(self, x):
+        return apply_hard_sigmoid(x)
+
+    def get_config(self):
+        return super().get_config()
+
+
+def adjust_channels(x, divisor=8, min_value=None):
+    """Ensure that all layers have a channel number divisible by the `divisor`.
+
+    Args:
+        x: integer, input value.
+        divisor: integer, the value by which a channel number should be
+            divisible, defaults to 8.
+        min_value: float, optional minimum value for the new tensor. If None,
+            defaults to value of divisor.
+
+    Returns:
+        the updated input scalar.
+    """
+
+    if min_value is None:
+        min_value = divisor
+
+    new_x = max(min_value, int(x + divisor / 2) // divisor * divisor)
+
+    # make sure that round down does not go down by more than 10%.
+    if new_x < 0.9 * x:
+        new_x += divisor
+    return new_x
+
+
+def apply_hard_sigmoid(x):
+    activation = keras.layers.ReLU(6.0)
+    return activation(x + 3.0) * (1.0 / 6.0)
+
+
+def apply_hard_swish(x):
+    return keras.layers.Multiply()([x, apply_hard_sigmoid(x)])
+
+
+def apply_inverted_res_block(
+    x,
+    expansion,
+    filters,
+    kernel_size,
+    stride,
+    se_ratio,
+    activation,
+    expansion_index,
+):
+    """An Inverted Residual Block.
+
+    Args:
+        x: input tensor.
+        expansion: integer, the expansion ratio, multiplied with infilters to
+            get the minimum value passed to adjust_channels.
+        filters: integer, number of filters for convolution layer.
+        kernel_size: integer, the kernel size for DepthWise Convolutions.
+        stride: integer, the stride length for DepthWise Convolutions.
+        se_ratio: float, ratio for bottleneck filters. Number of bottleneck
+            filters = filters * se_ratio.
+        activation: the activation layer to use.
+        expansion_index: integer, a unique identification if you want to use
+            expanded convolutions. If greater than 0, an additional Conv+BN
+            layer is added after the expanded convolutional layer.
+
+    Returns:
+        the updated input tensor.
+    """
+    if isinstance(activation, str):
+        if activation == "hard_swish":
+            activation = apply_hard_swish
+        else:
+            activation = keras.activations.get(activation)
+
+    shortcut = x
+    prefix = "expanded_conv_"
+    infilters = x.shape[CHANNEL_AXIS]
+
+    if expansion_index > 0:
+        prefix = f"expanded_conv_{expansion_index}_"
+
+        x = keras.layers.Conv2D(
+            adjust_channels(infilters * expansion),
+            kernel_size=1,
+            padding="same",
+            use_bias=False,
+            name=prefix + "expand",
+        )(x)
+        x = keras.layers.BatchNormalization(
+            axis=CHANNEL_AXIS,
+            epsilon=BN_EPSILON,
+            momentum=BN_MOMENTUM,
+            name=prefix + "expand_BatchNorm",
+        )(x)
+        x = activation(x)
+
+    if stride == 2:
+        x = keras.layers.ZeroPadding2D(
+            padding=utils.correct_pad_downsample(x, kernel_size),
+            name=prefix + "depthwise_pad",
+        )(x)
+
+    x = keras.layers.DepthwiseConv2D(
+        kernel_size,
+        strides=stride,
+        padding="same" if stride == 1 else "valid",
+        use_bias=False,
+        name=prefix + "depthwise",
+    )(x)
+    x = keras.layers.BatchNormalization(
+        axis=CHANNEL_AXIS,
+        epsilon=BN_EPSILON,
+        momentum=BN_MOMENTUM,
+        name=prefix + "depthwise_BatchNorm",
+    )(x)
+    x = activation(x)
+
+    if se_ratio:
+        se_filters = adjust_channels(infilters * expansion)
+        x = SqueezeAndExcite2D(
+            x,
+            se_filters,
+            adjust_channels(se_filters * se_ratio),
+            "relu",
+            HardSigmoidActivation(),
+        )
+
+    x = keras.layers.Conv2D(
+        filters,
+        kernel_size=1,
+        padding="same",
+        use_bias=False,
+        name=prefix + "project",
+    )(x)
+    x = keras.layers.BatchNormalization(
+        axis=CHANNEL_AXIS,
+        epsilon=BN_EPSILON,
+        momentum=BN_MOMENTUM,
+        name=prefix + "project_BatchNorm",
+    )(x)
+
+    if stride == 1 and infilters == filters:
+        x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
+
+    return x
+
+def SqueezeAndExcite2D(
+    input,
+    filters,
+    bottleneck_filters=None,
+    squeeze_activation="relu",
+    excite_activation="sigmoid",
+):
+    """
+    Args:
+        filters: Number of input and output filters. The number of input and
+            output filters is same.
+        bottleneck_filters: (Optional) Number of bottleneck filters. Defaults
+            to `0.25 * filters`
+        squeeze_activation: (Optional) String, callable (or
+            keras.layers.Layer) or keras.activations.Activation instance
+            denoting activation to be applied after squeeze convolution.
+            Defaults to `relu`.
+        excite_activation: (Optional) String, callable (or
+            keras.layers.Layer) or keras.activations.Activation instance
+            denoting activation to be applied after excite convolution.
+            Defaults to `sigmoid`.
+    Example:
+
+    ```python
+    # (...)
+    input = tf.ones((1, 5, 5, 16), dtype=tf.float32)
+    x = keras.layers.Conv2D(16, (3, 3))(input)
+    
+    # (...)
+    ```
+    """
+    if not bottleneck_filters:
+        bottleneck_filters = (filters // 4)
+
+    x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
+    x = keras.layers.Conv2D(
+            bottleneck_filters,
+            (1, 1),
+            activation=self.squeeze_activation,
+    )(x)
+    x = keras.layers.Conv2D(
+            self.filters, (1, 1), activation=self.excite_activation
+    )(x)
+
+    x = ops.multiply(x, input)
+    return x
\ No newline at end of file
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
new file mode 100644
index 0000000000..d106891ddf
--- /dev/null
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
@@ -0,0 +1,52 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.tests.test_case import TestCase
+
+
+class MobileNetV3BackboneTest(TestCase):
+    def setUp(self):
+        self.init_kwargs = {
+            "stackwise_expansion": [1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
+            "stackwise_filters": [16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
+            "stackwise_kernel_size": [3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
+            "stackwise_stride": [2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+            "stackwise_se_ratio": [0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
+            "stackwise_activation": ["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+            "include_rescaling": False,
+            "input_image_shape": (224, 224, 3),
+            "alpha": 1
+        }
+        self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
+
+    def test_backbone_basics(self):
+        self.run_backbone_test(
+            cls=MobileNetV3Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=(2, 7, 7, 1024),
+            run_mixed_precision_check=False,
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=MobileNetV3Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
\ No newline at end of file
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
new file mode 100644
index 0000000000..83fafe616e
--- /dev/null
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
@@ -0,0 +1,111 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import keras
+
+from keras_nlp.src.api_export import keras_nlp_export
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.models.image_classifier import ImageClassifier
+
+
+@keras_nlp_export("keras_nlp.models.MobileNetV3ImageClassifier")
+class MobileNetV3ImageClassifier(ImageClassifier):
+    """MobileNetV3 image classifier task model.
+
+    To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
+    where `x` is a tensor and `y` is a integer from `[0, num_classes)`.
+    All `ImageClassifier` tasks include a `from_preset()` constructor which can
+    be used to load a pre-trained config and weights.
+
+    Args:
+        backbone: A `keras_nlp.models.MobileNetV3Backbone` instance.
+        num_classes: int. The number of classes to predict.
+        activation: `None`, str or callable. The activation function to use on
+            the `Dense` layer. Set `activation=None` to return the output
+            logits. Defaults to `"softmax"`.
+
+    Examples:
+
+    Call `predict()` to run inference.
+    ```python
+    # Load preset and train
+    images = np.ones((2, 224, 224, 3), dtype="float32")
+    classifier = keras_nlp.models.MobileNetV3ImageClassifier.from_preset(
+        "mobilenet_v3_small_imagenet")
+    classifier.predict(images)
+    ```
+\
+    Custom backbone.
+    ```python
+    images = np.ones((2, 224, 224, 3), dtype="float32")
+    labels = [0, 3]
+    model = MobileNetV3Backbone(
+        stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
+        stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
+        stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
+        stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+        stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
+        stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+        include_rescaling=False,
+    )
+    classifier = keras_nlp.models.MobileNetV3ImageClassifier(
+        backbone=backbone,
+        num_classes=4,
+    )
+    classifier.fit(x=images, y=labels, batch_size=2)
+    ```
+    """
+
+    backbone_cls = MobileNetV3Backbone
+
+    def __init__(
+        self,
+        backbone,
+        num_classes,
+        activation="softmax",
+        preprocessor=None,  # adding this dummy arg for saved model test
+        # TODO: once preprocessor flow is figured out, this needs to be updated
+        **kwargs,
+    ):
+        # === Layers ===
+        self.backbone = backbone
+        self.output_dense = keras.layers.Dense(
+            num_classes,
+            activation=activation,
+            name="predictions",
+        )
+
+        # === Functional Model ===
+        inputs = self.backbone.input
+        x = self.backbone(inputs)
+        outputs = self.output_dense(x)
+        super().__init__(
+            inputs=inputs,
+            outputs=outputs,
+            **kwargs,
+        )
+
+        # === Config ===
+        self.num_classes = num_classes
+        self.activation = activation
+
+    def get_config(self):
+        # Backbone serialized in `super`
+        config = super().get_config()
+        config.update(
+            {
+                "num_classes": self.num_classes,
+                "activation": self.activation,
+            }
+        )
+        return config
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
new file mode 100644
index 0000000000..27752775b6
--- /dev/null
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
@@ -0,0 +1,67 @@
+# Copyright 2023 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import (
+    MobileNetV3ImageClassifier,
+)
+from keras_nlp.src.tests.test_case import TestCase
+
+
+class MobileNetV3ImageClassifierTest(TestCase):
+    def setUp(self):
+        # Setup model.
+        self.images = np.ones((2, 224, 224, 3), dtype="float32")
+        self.labels = [0, 3]
+        self.backbone = MobileNetV3Backbone(
+
+            stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
+            stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
+            stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
+            stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+            stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
+            stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+            include_rescaling=False,
+            input_shape=(224, 224, 3),
+        )
+        self.init_kwargs = {
+            "backbone": self.backbone,
+            "num_classes": 2,
+            "activation": "softmax",
+        }
+        self.train_data = (
+            self.images,
+            self.labels,
+        )
+
+    def test_classifier_basics(self):
+        pytest.skip(
+            reason="TODO: enable after preprocessor flow is figured out"
+        )
+        self.run_task_test(
+            cls=MobileNetV3ImageClassifier,
+            init_kwargs=self.init_kwargs,
+            train_data=self.train_data,
+            expected_output_shape=(2, 2),
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=MobileNetV3ImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
\ No newline at end of file

From 65cc1f8973c2b364fbd23dfff5fd5677a03ff603 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 12:36:59 +0530
Subject: [PATCH 02/28] minor bug fixed in mobilenet_v3_backbone

---
 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index f85e6efa67..6726733493 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -9,7 +9,7 @@
 BN_MOMENTUM = 0.999
 
 
-@keras_cv_export("keras_nlp.models.MobileNetV3Backbone")
+@keras_nlp_export("keras_nlp.models.MobileNetV3Backbone")
 class MobileNetV3Backbone(Backbone):
     """Instantiates the MobileNetV3 architecture.
 

From d66cb9a916d3b5923aa39612e3f7e4dcdb903e35 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 13:52:59 +0530
Subject: [PATCH 03/28] formatting corrected

---
 keras_nlp/api/models/__init__.py              |  4 +-
 .../mobilenet_v3/mobilenet_v3_backbone.py     | 19 +++----
 .../mobilenet_v3_backbone_test.py             | 50 ++++++++++++++++---
 .../mobilenet_v3_image_classifier.py          |  4 +-
 .../mobilenet_v3_image_classifier_test.py     | 49 +++++++++++++++---
 5 files changed, 103 insertions(+), 23 deletions(-)

diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py
index 5b957ee548..f8dd88a688 100644
--- a/keras_nlp/api/models/__init__.py
+++ b/keras_nlp/api/models/__init__.py
@@ -165,7 +165,9 @@
     MistralPreprocessor,
 )
 from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
+    MobileNetV3Backbone,
+)
 from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import (
     MobileNetV3ImageClassifier,
 )
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index 6726733493..e170bdbb22 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -97,7 +97,7 @@ def __init__(
         x = apply_hard_swish(x)
 
         for stack_index in range(len(stackwise_filters)):
-            
+
             x = apply_inverted_res_block(
                 x,
                 expansion=stackwise_expansion[stack_index],
@@ -110,7 +110,7 @@ def __init__(
                 activation=stackwise_activation[stack_index],
                 expansion_index=stack_index,
             )
-        
+
         last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
 
         x = keras.layers.Conv2D(
@@ -309,6 +309,7 @@ def apply_inverted_res_block(
 
     return x
 
+
 def SqueezeAndExcite2D(
     input,
     filters,
@@ -336,22 +337,22 @@ def SqueezeAndExcite2D(
     # (...)
     input = tf.ones((1, 5, 5, 16), dtype=tf.float32)
     x = keras.layers.Conv2D(16, (3, 3))(input)
-    
+
     # (...)
     ```
     """
     if not bottleneck_filters:
-        bottleneck_filters = (filters // 4)
+        bottleneck_filters = filters // 4
 
     x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
     x = keras.layers.Conv2D(
-            bottleneck_filters,
-            (1, 1),
-            activation=self.squeeze_activation,
+        bottleneck_filters,
+        (1, 1),
+        activation=self.squeeze_activation,
     )(x)
     x = keras.layers.Conv2D(
-            self.filters, (1, 1), activation=self.excite_activation
+        self.filters, (1, 1), activation=self.excite_activation
     )(x)
 
     x = ops.multiply(x, input)
-    return x
\ No newline at end of file
+    return x
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
index d106891ddf..e34790424a 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
@@ -15,22 +15,60 @@
 import numpy as np
 import pytest
 
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
+    MobileNetV3Backbone,
+)
 from keras_nlp.src.tests.test_case import TestCase
 
 
 class MobileNetV3BackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
-            "stackwise_expansion": [1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
+            "stackwise_expansion": [
+                1,
+                72.0 / 16,
+                88.0 / 24,
+                4,
+                6,
+                6,
+                3,
+                3,
+                6,
+                6,
+                6,
+            ],
             "stackwise_filters": [16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
             "stackwise_kernel_size": [3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
             "stackwise_stride": [2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
-            "stackwise_se_ratio": [0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
-            "stackwise_activation": ["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+            "stackwise_se_ratio": [
+                0.25,
+                None,
+                None,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            "stackwise_activation": [
+                "relu",
+                "relu",
+                "relu",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+            ],
             "include_rescaling": False,
             "input_image_shape": (224, 224, 3),
-            "alpha": 1
+            "alpha": 1,
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
 
@@ -49,4 +87,4 @@ def test_saved_model(self):
             cls=MobileNetV3Backbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
-        )
\ No newline at end of file
+        )
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
index 83fafe616e..0d2b91d740 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
@@ -14,8 +14,10 @@
 import keras
 
 from keras_nlp.src.api_export import keras_nlp_export
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
 from keras_nlp.src.models.image_classifier import ImageClassifier
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
+    MobileNetV3Backbone,
+)
 
 
 @keras_nlp_export("keras_nlp.models.MobileNetV3ImageClassifier")
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
index 27752775b6..f98abf6fb2 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
@@ -14,7 +14,9 @@
 import numpy as np
 import pytest
 
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone
+from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
+    MobileNetV3Backbone,
+)
 from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import (
     MobileNetV3ImageClassifier,
 )
@@ -27,13 +29,48 @@ def setUp(self):
         self.images = np.ones((2, 224, 224, 3), dtype="float32")
         self.labels = [0, 3]
         self.backbone = MobileNetV3Backbone(
-
-            stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
+            stackwise_expansion=[
+                1,
+                72.0 / 16,
+                88.0 / 24,
+                4,
+                6,
+                6,
+                3,
+                3,
+                6,
+                6,
+                6,
+            ],
             stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
             stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
             stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
-            stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
-            stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+            stackwise_se_ratio=[
+                0.25,
+                None,
+                None,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+                0.25,
+            ],
+            stackwise_activation=[
+                "relu",
+                "relu",
+                "relu",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+                "hard_swish",
+            ],
             include_rescaling=False,
             input_shape=(224, 224, 3),
         )
@@ -64,4 +101,4 @@ def test_saved_model(self):
             cls=MobileNetV3ImageClassifier,
             init_kwargs=self.init_kwargs,
             input_data=self.images,
-        )
\ No newline at end of file
+        )

From 8821e0cb6f20f7615d7abd294e620e4db0acfafe Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 13:57:29 +0530
Subject: [PATCH 04/28] refactoring backbone

---
 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index e170bdbb22..a60afe5a18 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -74,7 +74,7 @@ def __init__(
         alpha=1.0,
         **kwargs,
     ):
-        inputs = keras.layers.Input(shape=input_image_shape)
+        inputs = keras.layers.Input(shape=input_shape)
         x = inputs
 
         if include_rescaling:

From 189f268590d46efddc924ed16f516049481b0bf6 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 14:29:29 +0530
Subject: [PATCH 05/28] correct_pad_downsample method added

---
 .../mobilenet_v3/mobilenet_v3_backbone.py     | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index a60afe5a18..7ff2b7fb69 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -261,7 +261,7 @@ def apply_inverted_res_block(
 
     if stride == 2:
         x = keras.layers.ZeroPadding2D(
-            padding=utils.correct_pad_downsample(x, kernel_size),
+            padding=correct_pad_downsample(x, kernel_size),
             name=prefix + "depthwise_pad",
         )(x)
 
@@ -356,3 +356,28 @@ def SqueezeAndExcite2D(
 
     x = ops.multiply(x, input)
     return x
+
+
+def correct_pad_downsample(inputs, kernel_size):
+    """Returns a tuple for zero-padding for 2D convolution with downsampling.
+
+    Args:
+        inputs: Input tensor.
+        kernel_size: An integer or tuple/list of 2 integers.
+
+    Returns:
+        A tuple.
+    """
+    img_dim = 1
+    input_size = inputs.shape[img_dim : (img_dim + 2)]
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size, kernel_size)
+    if input_size[0] is None:
+        adjust = (1, 1)
+    else:
+        adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
+    correct = (kernel_size[0] // 2, kernel_size[1] // 2)
+    return (
+        (correct[0] - adjust[0], correct[0]),
+        (correct[1] - adjust[1], correct[1]),
+    )

From 709beaf88ca98667f20440d8483cb6a8753a0006 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 14:48:42 +0530
Subject: [PATCH 06/28] refactoring backbone

---
 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index 7ff2b7fb69..e4ec7d41dd 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -348,10 +348,10 @@ def SqueezeAndExcite2D(
     x = keras.layers.Conv2D(
         bottleneck_filters,
         (1, 1),
-        activation=self.squeeze_activation,
+        activation=squeeze_activation,
     )(x)
     x = keras.layers.Conv2D(
-        self.filters, (1, 1), activation=self.excite_activation
+        filters, (1, 1), activation=excite_activation
     )(x)
 
     x = ops.multiply(x, input)

From 4f04438806882059f06aaaa5a18c4a6cee23f935 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 15:11:05 +0530
Subject: [PATCH 07/28] parameters updated

---
 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py   | 5 +----
 .../src/models/mobilenet_v3/mobilenet_v3_backbone_test.py    | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index e4ec7d41dd..1f0f0d4340 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -3,7 +3,6 @@
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.models.backbone import Backbone
 
-
 CHANNEL_AXIS = -1
 BN_EPSILON = 1e-3
 BN_MOMENTUM = 0.999
@@ -350,9 +349,7 @@ def SqueezeAndExcite2D(
         (1, 1),
         activation=squeeze_activation,
     )(x)
-    x = keras.layers.Conv2D(
-        filters, (1, 1), activation=excite_activation
-    )(x)
+    x = keras.layers.Conv2D(filters, (1, 1), activation=excite_activation)(x)
 
     x = ops.multiply(x, input)
     return x
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
index e34790424a..ecc3af6a62 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
@@ -67,7 +67,7 @@ def setUp(self):
                 "hard_swish",
             ],
             "include_rescaling": False,
-            "input_image_shape": (224, 224, 3),
+            "input_shape": (224, 224, 3),
             "alpha": 1,
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")

From 9f6af774186168977a79a1e739e1f2a5705e03bb Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 15:44:56 +0530
Subject: [PATCH 08/28] Testcaseupdated, expected output shape corrected

---
 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
index ecc3af6a62..0343db5b42 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
@@ -77,7 +77,7 @@ def test_backbone_basics(self):
             cls=MobileNetV3Backbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
-            expected_output_shape=(2, 7, 7, 1024),
+            expected_output_shape=(2, 7, 7, 576),
             run_mixed_precision_check=False,
         )
 

From d590dfadb79bd336420a8a9968ebbf26d606cf1b Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 19 Aug 2024 16:30:33 +0530
Subject: [PATCH 09/28] code formatted with black

---
 keras_nlp/src/models/mobilenet_v3/__init__.py      |  2 +-
 .../models/mobilenet_v3/mobilenet_v3_backbone.py   | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/__init__.py b/keras_nlp/src/models/mobilenet_v3/__init__.py
index 2351a1b7b4..3364a6bd16 100644
--- a/keras_nlp/src/models/mobilenet_v3/__init__.py
+++ b/keras_nlp/src/models/mobilenet_v3/__init__.py
@@ -10,4 +10,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index 1f0f0d4340..ae76abbd24 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -1,5 +1,19 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import keras
 from keras import ops
+
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.models.backbone import Backbone
 

From b26c318fb3781748a9300a459766b32673266c11 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Tue, 20 Aug 2024 12:02:46 +0530
Subject: [PATCH 10/28] testcase updated

---
 .../mobilenet_v3_backbone_test.py             | 30 ++-----------------
 .../mobilenet_v3_image_classifier.py          |  2 +-
 .../mobilenet_v3_image_classifier_test.py     | 30 ++-----------------
 3 files changed, 7 insertions(+), 55 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
index 0343db5b42..cbef4f2845 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
@@ -26,44 +26,20 @@ def setUp(self):
         self.init_kwargs = {
             "stackwise_expansion": [
                 1,
-                72.0 / 16,
-                88.0 / 24,
                 4,
                 6,
-                6,
-                3,
-                3,
-                6,
-                6,
-                6,
             ],
-            "stackwise_filters": [16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
-            "stackwise_kernel_size": [3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
-            "stackwise_stride": [2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+            "stackwise_filters": [4, 8, 16],
+            "stackwise_kernel_size": [3, 3, 5],
+            "stackwise_stride": [2, 2, 1],
             "stackwise_se_ratio": [
                 0.25,
                 None,
-                None,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
                 0.25,
             ],
             "stackwise_activation": [
                 "relu",
                 "relu",
-                "relu",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
                 "hard_swish",
             ],
             "include_rescaling": False,
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
index 0d2b91d740..a7b674ce67 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
@@ -46,7 +46,7 @@ class MobileNetV3ImageClassifier(ImageClassifier):
         "mobilenet_v3_small_imagenet")
     classifier.predict(images)
     ```
-\
+
     Custom backbone.
     ```python
     images = np.ones((2, 224, 224, 3), dtype="float32")
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
index f98abf6fb2..e9500ca853 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
@@ -31,44 +31,20 @@ def setUp(self):
         self.backbone = MobileNetV3Backbone(
             stackwise_expansion=[
                 1,
-                72.0 / 16,
-                88.0 / 24,
                 4,
                 6,
-                6,
-                3,
-                3,
-                6,
-                6,
-                6,
             ],
-            stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
-            stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
-            stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+            stackwise_filters=[4, 8, 16],
+            stackwise_kernel_size=[3, 3, 5],
+            stackwise_stride=[2, 2, 1],
             stackwise_se_ratio=[
                 0.25,
                 None,
-                None,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
                 0.25,
             ],
             stackwise_activation=[
                 "relu",
                 "relu",
-                "relu",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
-                "hard_swish",
                 "hard_swish",
             ],
             include_rescaling=False,

From 1ed96a9920ae371d35390479b410219d58a767e1 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Tue, 20 Aug 2024 12:34:16 +0530
Subject: [PATCH 11/28] refactoring and description added

---
 .../mobilenet_v3/mobilenet_v3_backbone.py     | 26 ++++++++++++-------
 .../mobilenet_v3_backbone_test.py             | 20 +++-----------
 .../mobilenet_v3_image_classifier.py          | 14 +++++-----
 3 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index ae76abbd24..c803fab02b 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -60,22 +60,22 @@ class MobileNetV3Backbone(Backbone):
     input_data = tf.ones(shape=(8, 224, 224, 3))
 
     # Randomly initialized backbone with a custom config
+
     model = MobileNetV3Backbone(
-        stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
-        stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
-        stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
-        stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
-        stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
-        stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
-        include_rescaling=False,
+        "stackwise_expansion": [1, 4, 6],
+        "stackwise_filters"= [4, 8, 16],
+        "stackwise_kernel_size"= [3, 3, 5],
+        "stackwise_stride"= [2, 2, 1],
+        "stackwise_se_ratio"= [ 0.25, None, 0.25],
+        "stackwise_activation"= ["relu", "relu", "hard_swish"],
+        "include_rescaling"= False,
     )
     output = model(input_data)
     ```
-    """  # noqa: E501
+    """
 
     def __init__(
         self,
-        *,
         stackwise_expansion,
         stackwise_filters,
         stackwise_kernel_size,
@@ -87,6 +87,7 @@ def __init__(
         alpha=1.0,
         **kwargs,
     ):
+        # === Functional Model ===
         inputs = keras.layers.Input(shape=input_shape)
         x = inputs
 
@@ -143,6 +144,7 @@ def __init__(
 
         super().__init__(inputs=inputs, outputs=x, **kwargs)
 
+        # === Config ===
         self.stackwise_expansion = stackwise_expansion
         self.stackwise_filters = stackwise_filters
         self.stackwise_kernel_size = stackwise_kernel_size
@@ -331,6 +333,12 @@ def SqueezeAndExcite2D(
     excite_activation="sigmoid",
 ):
     """
+    Description:
+        This layer applies a content-aware mechanism to adaptively assign
+        channel-wise weights. It uses global average pooling to compress
+        feature maps into single values, which are then processed by
+        two Conv1D layers: the first reduces the dimensionality, and
+        the second restores it.
     Args:
         filters: Number of input and output filters. The number of input and
             output filters is same.
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
index cbef4f2845..295a7014e6 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
@@ -24,24 +24,12 @@
 class MobileNetV3BackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
-            "stackwise_expansion": [
-                1,
-                4,
-                6,
-            ],
+            "stackwise_expansion": [1, 4, 6],
             "stackwise_filters": [4, 8, 16],
             "stackwise_kernel_size": [3, 3, 5],
             "stackwise_stride": [2, 2, 1],
-            "stackwise_se_ratio": [
-                0.25,
-                None,
-                0.25,
-            ],
-            "stackwise_activation": [
-                "relu",
-                "relu",
-                "hard_swish",
-            ],
+            "stackwise_se_ratio": [0.25, None, 0.25],
+            "stackwise_activation": ["relu", "relu", "hard_swish"],
             "include_rescaling": False,
             "input_shape": (224, 224, 3),
             "alpha": 1,
@@ -53,7 +41,7 @@ def test_backbone_basics(self):
             cls=MobileNetV3Backbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
-            expected_output_shape=(2, 7, 7, 576),
+            expected_output_shape=(2, 28, 28, 96),
             run_mixed_precision_check=False,
         )
 
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
index a7b674ce67..2d8faa83d0 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
@@ -52,13 +52,13 @@ class MobileNetV3ImageClassifier(ImageClassifier):
     images = np.ones((2, 224, 224, 3), dtype="float32")
     labels = [0, 3]
     model = MobileNetV3Backbone(
-        stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6],
-        stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96],
-        stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5],
-        stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
-        stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
-        stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
-        include_rescaling=False,
+        "stackwise_expansion": [1, 4, 6],
+        "stackwise_filters"= [4, 8, 16],
+        "stackwise_kernel_size"= [3, 3, 5],
+        "stackwise_stride"= [2, 2, 1],
+        "stackwise_se_ratio"= [ 0.25, None, 0.25],
+        "stackwise_activation"= ["relu", "relu", "hard_swish"],
+        "include_rescaling"= False,
     )
     classifier = keras_nlp.models.MobileNetV3ImageClassifier(
         backbone=backbone,

From dd2554d53b8c9a3edbb614680800fb5b58b3b62a Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Tue, 20 Aug 2024 18:51:59 +0530
Subject: [PATCH 12/28] comments updated

---
 .../models/mobilenet_v3/mobilenet_v3_backbone.py   | 14 +++++++-------
 .../mobilenet_v3/mobilenet_v3_image_classifier.py  | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
index c803fab02b..4dc532b5c3 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
@@ -62,13 +62,13 @@ class MobileNetV3Backbone(Backbone):
     # Randomly initialized backbone with a custom config
 
     model = MobileNetV3Backbone(
-        "stackwise_expansion": [1, 4, 6],
-        "stackwise_filters"= [4, 8, 16],
-        "stackwise_kernel_size"= [3, 3, 5],
-        "stackwise_stride"= [2, 2, 1],
-        "stackwise_se_ratio"= [ 0.25, None, 0.25],
-        "stackwise_activation"= ["relu", "relu", "hard_swish"],
-        "include_rescaling"= False,
+        stackwise_expansion = [1, 4, 6],
+        stackwise_filters = [4, 8, 16],
+        stackwise_kernel_size = [3, 3, 5],
+        stackwise_stride = [2, 2, 1],
+        stackwise_se_ratio = [ 0.25, None, 0.25],
+        stackwise_activation = ["relu", "relu", "hard_swish"],
+        include_rescaling = False,
     )
     output = model(input_data)
     ```
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
index 2d8faa83d0..77e677ce76 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
+++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
@@ -52,13 +52,13 @@ class MobileNetV3ImageClassifier(ImageClassifier):
     images = np.ones((2, 224, 224, 3), dtype="float32")
     labels = [0, 3]
     model = MobileNetV3Backbone(
-        "stackwise_expansion": [1, 4, 6],
-        "stackwise_filters"= [4, 8, 16],
-        "stackwise_kernel_size"= [3, 3, 5],
-        "stackwise_stride"= [2, 2, 1],
-        "stackwise_se_ratio"= [ 0.25, None, 0.25],
-        "stackwise_activation"= ["relu", "relu", "hard_swish"],
-        "include_rescaling"= False,
+        stackwise_expansion = [1, 4, 6],
+        stackwise_filters = [4, 8, 16],
+        stackwise_kernel_size = [3, 3, 5],
+        stackwise_stride = [2, 2, 1],
+        stackwise_se_ratio = [ 0.25, None, 0.25],
+        stackwise_activation = ["relu", "relu", "hard_swish"],
+        include_rescaling = False,
     )
     classifier = keras_nlp.models.MobileNetV3ImageClassifier(
         backbone=backbone,

From 59cf9e3c15ad51fd1b2558a08e7d0d56a0e57384 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Wed, 21 Aug 2024 02:52:38 +0530
Subject: [PATCH 13/28] added mobilenet v1 and v2

---
 keras_nlp/api/models/__init__.py              |   8 +-
 .../{mobilenet_v3 => mobilenet}/__init__.py   |   0
 .../mobilenet_backbone.py}                    | 112 +++++++++++++-----
 .../mobilenet_backbone_test.py}               |  11 +-
 .../mobilenet_image_classifier.py}            |  19 ++-
 .../mobilenet_image_classifier_test.py}       |  17 ++-
 6 files changed, 108 insertions(+), 59 deletions(-)
 rename keras_nlp/src/models/{mobilenet_v3 => mobilenet}/__init__.py (100%)
 rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_backbone.py => mobilenet/mobilenet_backbone.py} (79%)
 rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_backbone_test.py => mobilenet/mobilenet_backbone_test.py} (88%)
 rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_image_classifier.py => mobilenet/mobilenet_image_classifier.py} (86%)
 rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_image_classifier_test.py => mobilenet/mobilenet_image_classifier_test.py} (84%)

diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py
index f8dd88a688..c5ca6e3409 100644
--- a/keras_nlp/api/models/__init__.py
+++ b/keras_nlp/api/models/__init__.py
@@ -165,11 +165,9 @@
     MistralPreprocessor,
 )
 from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
-    MobileNetV3Backbone,
-)
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import (
-    MobileNetV3ImageClassifier,
+from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone
+from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import (
+    MobileNetImageClassifier,
 )
 from keras_nlp.src.models.opt.opt_backbone import OPTBackbone
 from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM
diff --git a/keras_nlp/src/models/mobilenet_v3/__init__.py b/keras_nlp/src/models/mobilenet/__init__.py
similarity index 100%
rename from keras_nlp/src/models/mobilenet_v3/__init__.py
rename to keras_nlp/src/models/mobilenet/__init__.py
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
similarity index 79%
rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
rename to keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 4dc532b5c3..b052c1eeda 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -22,11 +22,17 @@
 BN_MOMENTUM = 0.999
 
 
-@keras_nlp_export("keras_nlp.models.MobileNetV3Backbone")
-class MobileNetV3Backbone(Backbone):
-    """Instantiates the MobileNetV3 architecture.
+@keras_nlp_export("keras_nlp.models.MobileNetBackbone")
+class MobileNetBackbone(Backbone):
+    """Instantiates the MobileNet architecture.
 
     References:
+        - [MobileNets: Efficient Convolutional Neural Networks
+       for Mobile Vision Applications](
+        https://arxiv.org/abs/1704.04861)
+        - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
+        https://arxiv.org/abs/1801.04381) (CVPR 2018)
+        - [Based on the Original keras.applications MobileNetv2](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v2.py)
         - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf)
         (ICCV 2019)
         - [Based on the Original keras.applications MobileNetv3](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v3.py)
@@ -46,7 +52,7 @@ class MobileNetV3Backbone(Backbone):
             layer.
         input_shape: optional shape tuple, defaults to (None, None, 3).
         alpha: float, controls the width of the network. This is known as the
-            depth multiplier in the MobileNetV3 paper, but the name is kept for
+            depth multiplier in the MobileNet paper, but the name is kept for
             consistency with MobileNetV1 in Keras.
             - If `alpha` < 1.0, proportionally decreases the number
                 of filters in each layer.
@@ -54,6 +60,7 @@ class MobileNetV3Backbone(Backbone):
                 of filters in each layer.
             - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
+        version: MobileNet version
 
     Example:
     ```python
@@ -61,7 +68,7 @@ class MobileNetV3Backbone(Backbone):
 
     # Randomly initialized backbone with a custom config
 
-    model = MobileNetV3Backbone(
+    model = MobileNetBackbone(
         stackwise_expansion = [1, 4, 6],
         stackwise_filters = [4, 8, 16],
         stackwise_kernel_size = [3, 3, 5],
@@ -69,6 +76,7 @@ class MobileNetV3Backbone(Backbone):
         stackwise_se_ratio = [ 0.25, None, 0.25],
         stackwise_activation = ["relu", "relu", "hard_swish"],
         include_rescaling = False,
+        version = 'v3'
     )
     output = model(input_data)
     ```
@@ -85,17 +93,32 @@ def __init__(
         include_rescaling,
         input_shape=(224, 224, 3),
         alpha=1.0,
+        version="v3",
         **kwargs,
     ):
         # === Functional Model ===
+        if version not in ["v1", "v2", "v3"]:
+            raise ValueError(
+                "The `version` argument should be either `v1` (for MobileNet)"
+                "or `v2` ( for MobileNetV2)"
+                "or v3 (MobileNetV3), default version is `v3`"
+                f"Received `version={version}`"
+            )
         inputs = keras.layers.Input(shape=input_shape)
         x = inputs
 
         if include_rescaling:
             x = keras.layers.Rescaling(scale=1 / 255)(x)
 
+        first_ch = (
+            32
+            if version == "v1"
+            else (
+                adjust_channels(32 * alpha) if version == "v2" else 16
+            )  # This is for 'v3'
+        )
         x = keras.layers.Conv2D(
-            16,
+            first_ch,
             kernel_size=3,
             strides=(2, 2),
             padding="same",
@@ -108,7 +131,11 @@ def __init__(
             momentum=BN_MOMENTUM,
             name="Conv_BatchNorm",
         )(x)
-        x = apply_hard_swish(x)
+
+        if version == "v3":
+            x = apply_hard_swish(x)
+        else:
+            x = keras.layers.ReLU(6.0)(x)
 
         for stack_index in range(len(stackwise_filters)):
 
@@ -120,27 +147,41 @@ def __init__(
                 ),
                 kernel_size=stackwise_kernel_size[stack_index],
                 stride=stackwise_stride[stack_index],
-                se_ratio=stackwise_se_ratio[stack_index],
+                se_ratio=(
+                    stackwise_se_ratio[stack_index] if version == "v3" else 0
+                ),
                 activation=stackwise_activation[stack_index],
-                expansion_index=stack_index,
+                expansion_index=0 if version == "v1" else stack_index,
+                version=version,
             )
 
-        last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
-
-        x = keras.layers.Conv2D(
-            last_conv_ch,
-            kernel_size=1,
-            padding="same",
-            use_bias=False,
-            name="Conv_1",
-        )(x)
-        x = keras.layers.BatchNormalization(
-            axis=CHANNEL_AXIS,
-            epsilon=BN_EPSILON,
-            momentum=BN_MOMENTUM,
-            name="Conv_1_BatchNorm",
-        )(x)
-        x = apply_hard_swish(x)
+        if version == "v3":
+            last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
+        elif version == "v2":
+            if alpha > 1.0:
+                last_conv_ch = adjust_channels(1280 * alpha)
+            else:
+                last_conv_ch = 1280
+
+        if version != "v1":
+            x = keras.layers.Conv2D(
+                last_conv_ch,
+                kernel_size=1,
+                padding="same",
+                use_bias=False,
+                name="Conv_1",
+            )(x)
+            x = keras.layers.BatchNormalization(
+                axis=CHANNEL_AXIS,
+                epsilon=BN_EPSILON,
+                momentum=BN_MOMENTUM,
+                name="Conv_1_BatchNorm",
+            )(x)
+
+            if version == "v3":
+                x = apply_hard_swish(x)
+            else:
+                x = keras.layers.ReLU(6.0)(x)
 
         super().__init__(inputs=inputs, outputs=x, **kwargs)
 
@@ -153,6 +194,7 @@ def __init__(
         self.stackwise_activation = stackwise_activation
         self.include_rescaling = include_rescaling
         self.alpha = alpha
+        self.version = version
 
     def get_config(self):
         config = super().get_config()
@@ -167,6 +209,7 @@ def get_config(self):
                 "include_rescaling": self.include_rescaling,
                 "input_shape": self.input_shape[1:],
                 "alpha": self.alpha,
+                "version": self.version,
             }
         )
         return config
@@ -226,6 +269,7 @@ def apply_inverted_res_block(
     se_ratio,
     activation,
     expansion_index,
+    version="v3",
 ):
     """An Inverted Residual Block.
 
@@ -242,6 +286,7 @@ def apply_inverted_res_block(
         expansion_index: integer, a unique identification if you want to use
             expanded convolutions. If greater than 0, an additional Conv+BN
             layer is added after the expanded convolutional layer.
+        version: MobileNet architecture version, v1, v2 or v3
 
     Returns:
         the updated input tensor.
@@ -272,7 +317,10 @@ def apply_inverted_res_block(
             momentum=BN_MOMENTUM,
             name=prefix + "expand_BatchNorm",
         )(x)
-        x = activation(x)
+        if version == "v3":
+            x = activation(x)
+        else:
+            x = keras.layers.ReLU(6.0)(x)
 
     if stride == 2:
         x = keras.layers.ZeroPadding2D(
@@ -293,7 +341,10 @@ def apply_inverted_res_block(
         momentum=BN_MOMENTUM,
         name=prefix + "depthwise_BatchNorm",
     )(x)
-    x = activation(x)
+    if version == "v3":
+        x = activation(x)
+    else:
+        x = keras.layers.ReLU(6.0)(x)
 
     if se_ratio:
         se_filters = adjust_channels(infilters * expansion)
@@ -319,8 +370,11 @@ def apply_inverted_res_block(
         name=prefix + "project_BatchNorm",
     )(x)
 
-    if stride == 1 and infilters == filters:
-        x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
+    if version == "v1":
+        x = keras.layers.ReLU(6.0)(x)
+    else:
+        if stride == 1 and infilters == filters:
+            x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
 
     return x
 
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
similarity index 88%
rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
rename to keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index 295a7014e6..68671c6b3e 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -15,13 +15,11 @@
 import numpy as np
 import pytest
 
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
-    MobileNetV3Backbone,
-)
+from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone
 from keras_nlp.src.tests.test_case import TestCase
 
 
-class MobileNetV3BackboneTest(TestCase):
+class MobileNetBackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
             "stackwise_expansion": [1, 4, 6],
@@ -33,12 +31,13 @@ def setUp(self):
             "include_rescaling": False,
             "input_shape": (224, 224, 3),
             "alpha": 1,
+            "version": "v3",
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
 
     def test_backbone_basics(self):
         self.run_backbone_test(
-            cls=MobileNetV3Backbone,
+            cls=MobileNetBackbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
             expected_output_shape=(2, 28, 28, 96),
@@ -48,7 +47,7 @@ def test_backbone_basics(self):
     @pytest.mark.large
     def test_saved_model(self):
         self.run_model_saving_test(
-            cls=MobileNetV3Backbone,
+            cls=MobileNetBackbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py
similarity index 86%
rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
rename to keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py
index 77e677ce76..ed6239ee26 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py
@@ -15,13 +15,11 @@
 
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.models.image_classifier import ImageClassifier
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
-    MobileNetV3Backbone,
-)
+from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone
 
 
-@keras_nlp_export("keras_nlp.models.MobileNetV3ImageClassifier")
-class MobileNetV3ImageClassifier(ImageClassifier):
+@keras_nlp_export("keras_nlp.models.MobileNetImageClassifier")
+class MobileNetImageClassifier(ImageClassifier):
     """MobileNetV3 image classifier task model.
 
     To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
@@ -30,7 +28,7 @@ class MobileNetV3ImageClassifier(ImageClassifier):
     be used to load a pre-trained config and weights.
 
     Args:
-        backbone: A `keras_nlp.models.MobileNetV3Backbone` instance.
+        backbone: A `keras_nlp.models.MobileNetBackbone` instance.
         num_classes: int. The number of classes to predict.
         activation: `None`, str or callable. The activation function to use on
             the `Dense` layer. Set `activation=None` to return the output
@@ -42,7 +40,7 @@ class MobileNetV3ImageClassifier(ImageClassifier):
     ```python
     # Load preset and train
     images = np.ones((2, 224, 224, 3), dtype="float32")
-    classifier = keras_nlp.models.MobileNetV3ImageClassifier.from_preset(
+    classifier = keras_nlp.models.MobileNetImageClassifier.from_preset(
         "mobilenet_v3_small_imagenet")
     classifier.predict(images)
     ```
@@ -51,7 +49,7 @@ class MobileNetV3ImageClassifier(ImageClassifier):
     ```python
     images = np.ones((2, 224, 224, 3), dtype="float32")
     labels = [0, 3]
-    model = MobileNetV3Backbone(
+    model = MobileNetBackbone(
         stackwise_expansion = [1, 4, 6],
         stackwise_filters = [4, 8, 16],
         stackwise_kernel_size = [3, 3, 5],
@@ -59,8 +57,9 @@ class MobileNetV3ImageClassifier(ImageClassifier):
         stackwise_se_ratio = [ 0.25, None, 0.25],
         stackwise_activation = ["relu", "relu", "hard_swish"],
         include_rescaling = False,
+        "version"="v3",
     )
-    classifier = keras_nlp.models.MobileNetV3ImageClassifier(
+    classifier = keras_nlp.models.MobileNetImageClassifier(
         backbone=backbone,
         num_classes=4,
     )
@@ -68,7 +67,7 @@ class MobileNetV3ImageClassifier(ImageClassifier):
     ```
     """
 
-    backbone_cls = MobileNetV3Backbone
+    backbone_cls = MobileNetBackbone
 
     def __init__(
         self,
diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
similarity index 84%
rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
rename to keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index e9500ca853..d556478840 100644
--- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -14,21 +14,19 @@
 import numpy as np
 import pytest
 
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import (
-    MobileNetV3Backbone,
-)
-from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import (
-    MobileNetV3ImageClassifier,
+from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone
+from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import (
+    MobileNetImageClassifier,
 )
 from keras_nlp.src.tests.test_case import TestCase
 
 
-class MobileNetV3ImageClassifierTest(TestCase):
+class MobileNetImageClassifierTest(TestCase):
     def setUp(self):
         # Setup model.
         self.images = np.ones((2, 224, 224, 3), dtype="float32")
         self.labels = [0, 3]
-        self.backbone = MobileNetV3Backbone(
+        self.backbone = MobileNetBackbone(
             stackwise_expansion=[
                 1,
                 4,
@@ -49,6 +47,7 @@ def setUp(self):
             ],
             include_rescaling=False,
             input_shape=(224, 224, 3),
+            version="v3",
         )
         self.init_kwargs = {
             "backbone": self.backbone,
@@ -65,7 +64,7 @@ def test_classifier_basics(self):
             reason="TODO: enable after preprocessor flow is figured out"
         )
         self.run_task_test(
-            cls=MobileNetV3ImageClassifier,
+            cls=MobileNetImageClassifier,
             init_kwargs=self.init_kwargs,
             train_data=self.train_data,
             expected_output_shape=(2, 2),
@@ -74,7 +73,7 @@ def test_classifier_basics(self):
     @pytest.mark.large
     def test_saved_model(self):
         self.run_model_saving_test(
-            cls=MobileNetV3ImageClassifier,
+            cls=MobileNetImageClassifier,
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )

From 091752e17a5695d3af6e3108165f6fd11867a2d9 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Wed, 21 Aug 2024 03:08:56 +0530
Subject: [PATCH 14/28] merge conflict resolved

---
 keras_nlp/api/models/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py
index de678ea262..17b00c1f05 100644
--- a/keras_nlp/api/models/__init__.py
+++ b/keras_nlp/api/models/__init__.py
@@ -165,16 +165,16 @@
     MistralPreprocessor,
 )
 from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer
-from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone
-from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import (
-    MobileNetImageClassifier,
-)
 from keras_nlp.src.models.mix_transformer.mix_transformer_backbone import (
     MiTBackbone,
 )
 from keras_nlp.src.models.mix_transformer.mix_transformer_classifier import (
     MiTImageClassifier,
 )
+from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone
+from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import (
+    MobileNetImageClassifier,
+)
 from keras_nlp.src.models.opt.opt_backbone import OPTBackbone
 from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM
 from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import (

From eeecee6edda58eb6fc719b986ed5dc734ec2df97 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Wed, 21 Aug 2024 15:02:23 +0530
Subject: [PATCH 15/28] version arg removed, and config options added

---
 .../models/mobilenet/mobilenet_backbone.py    | 265 ++++++++++++------
 .../mobilenet/mobilenet_backbone_test.py      |   7 +-
 .../mobilenet/mobilenet_image_classifier.py   |   4 +-
 .../mobilenet_image_classifier_test.py        |  23 +-
 4 files changed, 195 insertions(+), 104 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index b052c1eeda..ecce30f6a2 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -25,6 +25,12 @@
 @keras_nlp_export("keras_nlp.models.MobileNetBackbone")
 class MobileNetBackbone(Backbone):
     """Instantiates the MobileNet architecture.
+    MobileNet is a lightweight convolutional neural network (CNN)
+    optimized for mobile and edge devices, striking a balance between
+    accuracy and efficiency. By employing depthwise separable convolutions
+    and techniques like Squeeze-and-Excitation (SE) blocks in later versions,
+    MobileNet models are highly suitable for real-time applications on
+    resource-constrained devices.
 
     References:
         - [MobileNets: Efficient Convolutional Neural Networks
@@ -32,13 +38,8 @@ class MobileNetBackbone(Backbone):
         https://arxiv.org/abs/1704.04861)
         - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
         https://arxiv.org/abs/1801.04381) (CVPR 2018)
-        - [Based on the Original keras.applications MobileNetv2](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v2.py)
         - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf)
         (ICCV 2019)
-        - [Based on the Original keras.applications MobileNetv3](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v3.py)
-
-    For transfer learning use cases, make sure to read the
-    [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/).
 
     Args:
         stackwise_expansion: list of ints or floats, the expansion ratio for
@@ -50,17 +51,25 @@ class MobileNetBackbone(Backbone):
         include_rescaling: bool, whether to rescale the inputs. If set to True,
             inputs will be passed through a `Rescaling(scale=1 / 255)`
             layer.
-        input_shape: optional shape tuple, defaults to (None, None, 3).
-        alpha: float, controls the width of the network. This is known as the
-            depth multiplier in the MobileNet paper, but the name is kept for
-            consistency with MobileNetV1 in Keras.
-            - If `alpha` < 1.0, proportionally decreases the number
+        image_shape: optional shape tuple, defaults to (None, None, 3).
+        depth_multiplier: float, controls the width of the network.
+            - If `depth_multiplier` < 1.0, proportionally decreases the number
                 of filters in each layer.
-            - If `alpha` > 1.0, proportionally increases the number
+            - If `depth_multiplier` > 1.0, proportionally increases the number
                 of filters in each layer.
-            - If `alpha` = 1, default number of filters from the paper
+            - If `depth_multiplier` = 1, default number of filters from the paper
                 are used at each layer.
-        version: MobileNet version
+        input_filter: number of filters in first convolution layer
+        output_filter: specifies whether to add conv and batch_norm in the end,
+            if set to None, it will not add these layers in the end.
+            'None' for MobileNetV1
+        activation: activation function to be used
+            'hard_swish' for MobileNetV3,
+            'relu6' for MobileNetV1 and MobileNetV2
+        inverted_res_block: whether to use inverted residual blocks or not,
+            'False' for MobileNetV1,
+            'True' for MobileNetV2 and MobileNetV3
+
 
     Example:
     ```python
@@ -74,9 +83,12 @@ class MobileNetBackbone(Backbone):
         stackwise_kernel_size = [3, 3, 5],
         stackwise_stride = [2, 2, 1],
         stackwise_se_ratio = [ 0.25, None, 0.25],
-        stackwise_activation = ["relu", "relu", "hard_swish"],
+        stackwise_activation = ["relu", "relu6", "hard_swish"],
         include_rescaling = False,
-        version = 'v3'
+        output_filter=1280,
+        activation="hard_swish",
+        inverted_res_block=True,
+
     )
     output = model(input_data)
     ```
@@ -91,34 +103,34 @@ def __init__(
         stackwise_se_ratio,
         stackwise_activation,
         include_rescaling,
-        input_shape=(224, 224, 3),
-        alpha=1.0,
-        version="v3",
+        output_filter,
+        activation,
+        inverted_res_block,
+        depth_multiplier=1.0,
+        input_filter=16,
+        image_shape=(224, 224, 3),
         **kwargs,
     ):
+        activation_str = activation
+        if isinstance(activation, str):
+            if activation == "hard_swish":
+                activation = apply_hard_swish
+            elif activation == "relu6":
+                activation = apply_relu6
+            else:
+                activation = keras.activations.get(activation)
+
         # === Functional Model ===
-        if version not in ["v1", "v2", "v3"]:
-            raise ValueError(
-                "The `version` argument should be either `v1` (for MobileNet)"
-                "or `v2` ( for MobileNetV2)"
-                "or v3 (MobileNetV3), default version is `v3`"
-                f"Received `version={version}`"
-            )
-        inputs = keras.layers.Input(shape=input_shape)
+
+        inputs = keras.layers.Input(shape=image_shape)
         x = inputs
 
         if include_rescaling:
             x = keras.layers.Rescaling(scale=1 / 255)(x)
 
-        first_ch = (
-            32
-            if version == "v1"
-            else (
-                adjust_channels(32 * alpha) if version == "v2" else 16
-            )  # This is for 'v3'
-        )
+        input_filter = adjust_channels(input_filter)
         x = keras.layers.Conv2D(
-            first_ch,
+            input_filter,
             kernel_size=3,
             strides=(2, 2),
             padding="same",
@@ -132,38 +144,45 @@ def __init__(
             name="Conv_BatchNorm",
         )(x)
 
-        if version == "v3":
-            x = apply_hard_swish(x)
-        else:
-            x = keras.layers.ReLU(6.0)(x)
+        x = activation(x)
 
         for stack_index in range(len(stackwise_filters)):
 
-            x = apply_inverted_res_block(
-                x,
-                expansion=stackwise_expansion[stack_index],
-                filters=adjust_channels(
-                    (stackwise_filters[stack_index]) * alpha
-                ),
-                kernel_size=stackwise_kernel_size[stack_index],
-                stride=stackwise_stride[stack_index],
-                se_ratio=(
-                    stackwise_se_ratio[stack_index] if version == "v3" else 0
-                ),
-                activation=stackwise_activation[stack_index],
-                expansion_index=0 if version == "v1" else stack_index,
-                version=version,
-            )
-
-        if version == "v3":
-            last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
-        elif version == "v2":
-            if alpha > 1.0:
-                last_conv_ch = adjust_channels(1280 * alpha)
+            if inverted_res_block:
+                x = apply_inverted_res_block(
+                    x,
+                    expansion=stackwise_expansion[stack_index],
+                    filters=adjust_channels(
+                        (stackwise_filters[stack_index]) * depth_multiplier
+                    ),
+                    kernel_size=stackwise_kernel_size[stack_index],
+                    stride=stackwise_stride[stack_index],
+                    se_ratio=(
+                        stackwise_se_ratio[stack_index]
+                        if activation_str == "hard_swish"
+                        else 0
+                    ),
+                    activation=stackwise_activation[stack_index],
+                    expansion_index=stack_index,
+                )
+            else:
+                x = apply_depthwise_conv_block(
+                    x,
+                    filters=adjust_channels(
+                        (stackwise_filters[stack_index]) * depth_multiplier
+                    ),
+                    kernel_size=3,
+                    stride=stackwise_stride[stack_index],
+                    depth_multiplier=depth_multiplier,
+                    block_id=stack_index,
+                )
+
+        if output_filter is not None:
+            if activation_str == "hard_swish":
+                last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
             else:
-                last_conv_ch = 1280
+                last_conv_ch = output_filter
 
-        if version != "v1":
             x = keras.layers.Conv2D(
                 last_conv_ch,
                 kernel_size=1,
@@ -178,10 +197,7 @@ def __init__(
                 name="Conv_1_BatchNorm",
             )(x)
 
-            if version == "v3":
-                x = apply_hard_swish(x)
-            else:
-                x = keras.layers.ReLU(6.0)(x)
+            x = activation(x)
 
         super().__init__(inputs=inputs, outputs=x, **kwargs)
 
@@ -193,8 +209,12 @@ def __init__(
         self.stackwise_se_ratio = stackwise_se_ratio
         self.stackwise_activation = stackwise_activation
         self.include_rescaling = include_rescaling
-        self.alpha = alpha
-        self.version = version
+        self.depth_multiplier = depth_multiplier
+        self.input_filter = input_filter
+        self.output_filter = output_filter
+        self.activation = activation
+        self.inverted_res_block = inverted_res_block
+        self.image_shape = image_shape[1:]
 
     def get_config(self):
         config = super().get_config()
@@ -207,9 +227,12 @@ def get_config(self):
                 "stackwise_se_ratio": self.stackwise_se_ratio,
                 "stackwise_activation": self.stackwise_activation,
                 "include_rescaling": self.include_rescaling,
-                "input_shape": self.input_shape[1:],
-                "alpha": self.alpha,
-                "version": self.version,
+                "image_shape": self.image_shape,
+                "depth_multiplier": self.depth_multiplier,
+                "input_filter": self.input_filter,
+                "output_filter": self.output_filter,
+                "activation": self.activation,
+                "inverted_res_block": self.inverted_res_block,
             }
         )
         return config
@@ -260,6 +283,10 @@ def apply_hard_swish(x):
     return keras.layers.Multiply()([x, apply_hard_sigmoid(x)])
 
 
+def apply_relu6(x):
+    return keras.layers.ReLU(6.0)(x)
+
+
 def apply_inverted_res_block(
     x,
     expansion,
@@ -269,7 +296,6 @@ def apply_inverted_res_block(
     se_ratio,
     activation,
     expansion_index,
-    version="v3",
 ):
     """An Inverted Residual Block.
 
@@ -286,7 +312,6 @@ def apply_inverted_res_block(
         expansion_index: integer, a unique identification if you want to use
             expanded convolutions. If greater than 0, an additional Conv+BN
             layer is added after the expanded convolutional layer.
-        version: MobileNet architecture version, v1, v2 or v3
 
     Returns:
         the updated input tensor.
@@ -294,6 +319,8 @@ def apply_inverted_res_block(
     if isinstance(activation, str):
         if activation == "hard_swish":
             activation = apply_hard_swish
+        elif activation == "relu6":
+            activation = apply_relu6
         else:
             activation = keras.activations.get(activation)
 
@@ -317,10 +344,7 @@ def apply_inverted_res_block(
             momentum=BN_MOMENTUM,
             name=prefix + "expand_BatchNorm",
         )(x)
-        if version == "v3":
-            x = activation(x)
-        else:
-            x = keras.layers.ReLU(6.0)(x)
+        x = activation(x)
 
     if stride == 2:
         x = keras.layers.ZeroPadding2D(
@@ -341,10 +365,7 @@ def apply_inverted_res_block(
         momentum=BN_MOMENTUM,
         name=prefix + "depthwise_BatchNorm",
     )(x)
-    if version == "v3":
-        x = activation(x)
-    else:
-        x = keras.layers.ReLU(6.0)(x)
+    x = activation(x)
 
     if se_ratio:
         se_filters = adjust_channels(infilters * expansion)
@@ -370,11 +391,85 @@ def apply_inverted_res_block(
         name=prefix + "project_BatchNorm",
     )(x)
 
-    if version == "v1":
-        x = keras.layers.ReLU(6.0)(x)
-    else:
-        if stride == 1 and infilters == filters:
-            x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
+    if stride == 1 and infilters == filters:
+        x = keras.layers.Add(name=prefix + "Add")([shortcut, x])
+
+    return x
+
+
+def apply_depthwise_conv_block(
+    x,
+    filters,
+    kernel_size=3,
+    depth_multiplier=1,
+    stride=1,
+    block_id=1,
+):
+    """Adds a depthwise convolution block.
+
+    A depthwise convolution block consists of a depthwise conv,
+    batch normalization, relu6, pointwise convolution,
+    batch normalization and relu6 activation.
+
+    Args:
+        x: Input tensor of shape `(rows, cols, channels)
+        filters: Integer, the dimensionality of the output space
+            (i.e. the number of output filters in the pointwise convolution).
+        depth_multiplier: controls the width of the network. - If `depth_multiplier` < 1.0,
+            proportionally decreases the number of filters in each layer.
+            - If `depth_multiplier` > 1.0, proportionally increases the number of filters
+                in each layer.
+            - If `depth_multiplier` = 1, default number of filters from the paper are
+                used at each layer.
+        strides: An integer or tuple/list of 2 integers, specifying the strides
+            of the convolution along the width and height.
+            Can be a single integer to specify the same value for
+            all spatial dimensions. Specifying any stride value != 1 is
+            incompatible with specifying any `dilation_rate` value != 1.
+        block_id: Integer, a unique identification designating the block number.
+
+    Input shape:
+        4D tensor with shape: `(batch, rows, cols, channels)`
+    Returns:
+        Output tensor of block.
+    """
+
+    if stride == 2:
+        x = keras.layers.ZeroPadding2D(
+            padding=correct_pad_downsample(x, kernel_size),
+            name="conv_pad_%d" % block_id,
+        )(x)
+
+    x = keras.layers.DepthwiseConv2D(
+        kernel_size,
+        strides=stride,
+        padding="same" if stride == 1 else "valid",
+        depth_multiplier=depth_multiplier,
+        use_bias=False,
+        name="depthwise_%d" % block_id,
+    )(x)
+    x = keras.layers.BatchNormalization(
+        axis=CHANNEL_AXIS,
+        epsilon=BN_EPSILON,
+        momentum=BN_MOMENTUM,
+        name="depthwise_BatchNorm_%d" % block_id,
+    )(x)
+    x = keras.layers.ReLU(6.0)(x)
+
+    x = keras.layers.Conv2D(
+        filters,
+        kernel_size=1,
+        padding="same",
+        use_bias=False,
+        name="conv_%d" % block_id,
+    )(x)
+    x = keras.layers.BatchNormalization(
+        axis=CHANNEL_AXIS,
+        epsilon=BN_EPSILON,
+        momentum=BN_MOMENTUM,
+        name="BatchNorm_%d" % block_id,
+    )(x)
+    x = keras.layers.ReLU(6.0)(x)
 
     return x
 
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index 68671c6b3e..98bac8feee 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -29,9 +29,12 @@ def setUp(self):
             "stackwise_se_ratio": [0.25, None, 0.25],
             "stackwise_activation": ["relu", "relu", "hard_swish"],
             "include_rescaling": False,
+            "output_filter": 1280,
+            "activation": "hard_swish",
+            "inverted_res_block": True,
+            "input_filter": 16,
             "input_shape": (224, 224, 3),
-            "alpha": 1,
-            "version": "v3",
+            "depth_multiplier": 1,
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
 
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py
index ed6239ee26..3e08f3482c 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py
@@ -57,7 +57,9 @@ class MobileNetImageClassifier(ImageClassifier):
         stackwise_se_ratio = [ 0.25, None, 0.25],
         stackwise_activation = ["relu", "relu", "hard_swish"],
         include_rescaling = False,
-        "version"="v3",
+        output_filter=1280,
+        activation="hard_swish",
+        inverted_res_block=True,
     )
     classifier = keras_nlp.models.MobileNetImageClassifier(
         backbone=backbone,
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index d556478840..24c9f5065e 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -27,27 +27,18 @@ def setUp(self):
         self.images = np.ones((2, 224, 224, 3), dtype="float32")
         self.labels = [0, 3]
         self.backbone = MobileNetBackbone(
-            stackwise_expansion=[
-                1,
-                4,
-                6,
-            ],
+            stackwise_expansion=[1, 4, 6],
             stackwise_filters=[4, 8, 16],
             stackwise_kernel_size=[3, 3, 5],
             stackwise_stride=[2, 2, 1],
-            stackwise_se_ratio=[
-                0.25,
-                None,
-                0.25,
-            ],
-            stackwise_activation=[
-                "relu",
-                "relu",
-                "hard_swish",
-            ],
+            stackwise_se_ratio=[0.25, None, 0.25],
+            stackwise_activation=["relu", "relu", "hard_swish"],
             include_rescaling=False,
+            output_filter=1280,
+            activation="hard_swish",
+            inverted_res_block=True,
+            input_filter=16,
             input_shape=(224, 224, 3),
-            version="v3",
         )
         self.init_kwargs = {
             "backbone": self.backbone,

From b442f7e145eea230354f1e8bf80259741a1ff127 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Wed, 21 Aug 2024 15:23:53 +0530
Subject: [PATCH 16/28] input_shape changed to image_shape in arg

---
 keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py       | 2 +-
 .../src/models/mobilenet/mobilenet_image_classifier_test.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index 98bac8feee..8b89285729 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -33,7 +33,7 @@ def setUp(self):
             "activation": "hard_swish",
             "inverted_res_block": True,
             "input_filter": 16,
-            "input_shape": (224, 224, 3),
+            "image_shape": (224, 224, 3),
             "depth_multiplier": 1,
         }
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index 24c9f5065e..88b71f44a4 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -38,7 +38,7 @@ def setUp(self):
             activation="hard_swish",
             inverted_res_block=True,
             input_filter=16,
-            input_shape=(224, 224, 3),
+            image_shape=(224, 224, 3),
         )
         self.init_kwargs = {
             "backbone": self.backbone,

From 5e967314ea2324bc502ba25c3986ebb7eeda0d70 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Wed, 21 Aug 2024 15:57:19 +0530
Subject: [PATCH 17/28] config updated

---
 keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index ecce30f6a2..d137cd1b40 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -212,7 +212,7 @@ def __init__(
         self.depth_multiplier = depth_multiplier
         self.input_filter = input_filter
         self.output_filter = output_filter
-        self.activation = activation
+        self.activation = activation_str
         self.inverted_res_block = inverted_res_block
         self.image_shape = image_shape[1:]
 

From 3108c7e28a9c845ae27ff639c65bed88582b1105 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Wed, 21 Aug 2024 16:38:48 +0530
Subject: [PATCH 18/28] input shape corrected

---
 keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index d137cd1b40..fdd791bfed 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -214,7 +214,7 @@ def __init__(
         self.output_filter = output_filter
         self.activation = activation_str
         self.inverted_res_block = inverted_res_block
-        self.image_shape = image_shape[1:]
+        self.image_shape = image_shape
 
     def get_config(self):
         config = super().get_config()

From 066b6ab3e3a559cdfff4f24a0c8e029ccd7044d5 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Thu, 22 Aug 2024 16:16:41 +0530
Subject: [PATCH 19/28] comments resolved

---
 .../models/mobilenet/mobilenet_backbone.py    | 107 ++++++------------
 .../mobilenet/mobilenet_backbone_test.py      |   5 +-
 .../mobilenet_image_classifier_test.py        |   5 +-
 3 files changed, 38 insertions(+), 79 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index fdd791bfed..2892d2ca02 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -25,6 +25,7 @@
 @keras_nlp_export("keras_nlp.models.MobileNetBackbone")
 class MobileNetBackbone(Backbone):
     """Instantiates the MobileNet architecture.
+
     MobileNet is a lightweight convolutional neural network (CNN)
     optimized for mobile and edge devices, striking a balance between
     accuracy and efficiency. By employing depthwise separable convolutions
@@ -51,7 +52,7 @@ class MobileNetBackbone(Backbone):
         include_rescaling: bool, whether to rescale the inputs. If set to True,
             inputs will be passed through a `Rescaling(scale=1 / 255)`
             layer.
-        image_shape: optional shape tuple, defaults to (None, None, 3).
+        image_shape: optional shape tuple, defaults to (224, 224, 3).
         depth_multiplier: float, controls the width of the network.
             - If `depth_multiplier` < 1.0, proportionally decreases the number
                 of filters in each layer.
@@ -76,15 +77,14 @@ class MobileNetBackbone(Backbone):
     input_data = tf.ones(shape=(8, 224, 224, 3))
 
     # Randomly initialized backbone with a custom config
-
     model = MobileNetBackbone(
-        stackwise_expansion = [1, 4, 6],
-        stackwise_filters = [4, 8, 16],
-        stackwise_kernel_size = [3, 3, 5],
-        stackwise_stride = [2, 2, 1],
-        stackwise_se_ratio = [ 0.25, None, 0.25],
-        stackwise_activation = ["relu", "relu6", "hard_swish"],
-        include_rescaling = False,
+        stackwise_expansion=[1, 4, 6],
+        stackwise_filters=[4, 8, 16],
+        stackwise_kernel_size=[3, 3, 5],
+        stackwise_stride=[2, 2, 1],
+        stackwise_se_ratio=[0.25, None, 0.25],
+        stackwise_activation=["relu", "relu6", "hard_swish"],
+        include_rescaling=False,
         output_filter=1280,
         activation="hard_swish",
         inverted_res_block=True,
@@ -104,21 +104,16 @@ def __init__(
         stackwise_activation,
         include_rescaling,
         output_filter,
-        activation,
         inverted_res_block,
+        activation=keras.activations.hard_swish,
         depth_multiplier=1.0,
-        input_filter=16,
+        input_filters=16,
         image_shape=(224, 224, 3),
         **kwargs,
     ):
         activation_str = activation
-        if isinstance(activation, str):
-            if activation == "hard_swish":
-                activation = apply_hard_swish
-            elif activation == "relu6":
-                activation = apply_relu6
-            else:
-                activation = keras.activations.get(activation)
+
+        activation = keras.activations.get(activation)
 
         # === Functional Model ===
 
@@ -128,20 +123,20 @@ def __init__(
         if include_rescaling:
             x = keras.layers.Rescaling(scale=1 / 255)(x)
 
-        input_filter = adjust_channels(input_filter)
+        input_filters = adjust_channels(input_filters)
         x = keras.layers.Conv2D(
-            input_filter,
+            input_filters,
             kernel_size=3,
             strides=(2, 2),
             padding="same",
             use_bias=False,
-            name="Conv",
+            name="input_conv",
         )(x)
         x = keras.layers.BatchNormalization(
             axis=CHANNEL_AXIS,
             epsilon=BN_EPSILON,
             momentum=BN_MOMENTUM,
-            name="Conv_BatchNorm",
+            name="input_batch_norm",
         )(x)
 
         x = activation(x)
@@ -157,11 +152,7 @@ def __init__(
                     ),
                     kernel_size=stackwise_kernel_size[stack_index],
                     stride=stackwise_stride[stack_index],
-                    se_ratio=(
-                        stackwise_se_ratio[stack_index]
-                        if activation_str == "hard_swish"
-                        else 0
-                    ),
+                    se_ratio=(stackwise_se_ratio[stack_index]),
                     activation=stackwise_activation[stack_index],
                     expansion_index=stack_index,
                 )
@@ -178,23 +169,20 @@ def __init__(
                 )
 
         if output_filter is not None:
-            if activation_str == "hard_swish":
-                last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
-            else:
-                last_conv_ch = output_filter
+            last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
 
             x = keras.layers.Conv2D(
                 last_conv_ch,
                 kernel_size=1,
                 padding="same",
                 use_bias=False,
-                name="Conv_1",
+                name="output_conv",
             )(x)
             x = keras.layers.BatchNormalization(
                 axis=CHANNEL_AXIS,
                 epsilon=BN_EPSILON,
                 momentum=BN_MOMENTUM,
-                name="Conv_1_BatchNorm",
+                name="output_batch_norm",
             )(x)
 
             x = activation(x)
@@ -210,7 +198,7 @@ def __init__(
         self.stackwise_activation = stackwise_activation
         self.include_rescaling = include_rescaling
         self.depth_multiplier = depth_multiplier
-        self.input_filter = input_filter
+        self.input_filters = input_filters
         self.output_filter = output_filter
         self.activation = activation_str
         self.inverted_res_block = inverted_res_block
@@ -229,7 +217,7 @@ def get_config(self):
                 "include_rescaling": self.include_rescaling,
                 "image_shape": self.image_shape,
                 "depth_multiplier": self.depth_multiplier,
-                "input_filter": self.input_filter,
+                "input_filters": self.input_filters,
                 "output_filter": self.output_filter,
                 "activation": self.activation,
                 "inverted_res_block": self.inverted_res_block,
@@ -238,12 +226,12 @@ def get_config(self):
         return config
 
 
-class HardSigmoidActivation(keras.layers.Layer):
+class HardSigmoidActivation:
     def __init__(self):
         super().__init__()
 
     def call(self, x):
-        return apply_hard_sigmoid(x)
+        return keras.activations.hard_sigmoid(x)
 
     def get_config(self):
         return super().get_config()
@@ -274,19 +262,6 @@ def adjust_channels(x, divisor=8, min_value=None):
     return new_x
 
 
-def apply_hard_sigmoid(x):
-    activation = keras.layers.ReLU(6.0)
-    return activation(x + 3.0) * (1.0 / 6.0)
-
-
-def apply_hard_swish(x):
-    return keras.layers.Multiply()([x, apply_hard_sigmoid(x)])
-
-
-def apply_relu6(x):
-    return keras.layers.ReLU(6.0)(x)
-
-
 def apply_inverted_res_block(
     x,
     expansion,
@@ -316,14 +291,7 @@ def apply_inverted_res_block(
     Returns:
         the updated input tensor.
     """
-    if isinstance(activation, str):
-        if activation == "hard_swish":
-            activation = apply_hard_swish
-        elif activation == "relu6":
-            activation = apply_relu6
-        else:
-            activation = keras.activations.get(activation)
-
+    activation = keras.activations.get(activation)
     shortcut = x
     prefix = "expanded_conv_"
     infilters = x.shape[CHANNEL_AXIS]
@@ -370,11 +338,11 @@ def apply_inverted_res_block(
     if se_ratio:
         se_filters = adjust_channels(infilters * expansion)
         x = SqueezeAndExcite2D(
-            x,
-            se_filters,
-            adjust_channels(se_filters * se_ratio),
-            "relu",
-            HardSigmoidActivation(),
+            input=x,
+            filters=se_filters,
+            bottleneck_filters=adjust_channels(se_filters * se_ratio),
+            squeeze_activation="relu",
+            excite_activation=HardSigmoidActivation(),
         )
 
     x = keras.layers.Conv2D(
@@ -469,9 +437,7 @@ def apply_depthwise_conv_block(
         momentum=BN_MOMENTUM,
         name="BatchNorm_%d" % block_id,
     )(x)
-    x = keras.layers.ReLU(6.0)(x)
-
-    return x
+    return keras.layers.ReLU(6.0)(x)
 
 
 def SqueezeAndExcite2D(
@@ -501,15 +467,6 @@ def SqueezeAndExcite2D(
             keras.layers.Layer) or keras.activations.Activation instance
             denoting activation to be applied after excite convolution.
             Defaults to `sigmoid`.
-    Example:
-
-    ```python
-    # (...)
-    input = tf.ones((1, 5, 5, 16), dtype=tf.float32)
-    x = keras.layers.Conv2D(16, (3, 3))(input)
-
-    # (...)
-    ```
     """
     if not bottleneck_filters:
         bottleneck_filters = filters // 4
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index 8b89285729..c5b3790366 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import keras
 import numpy as np
 import pytest
 
@@ -30,9 +31,9 @@ def setUp(self):
             "stackwise_activation": ["relu", "relu", "hard_swish"],
             "include_rescaling": False,
             "output_filter": 1280,
-            "activation": "hard_swish",
+            "activation": keras.activations.hard_swish,
             "inverted_res_block": True,
-            "input_filter": 16,
+            "input_filters": 16,
             "image_shape": (224, 224, 3),
             "depth_multiplier": 1,
         }
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index 88b71f44a4..0a561056e4 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import keras
 import numpy as np
 import pytest
 
@@ -35,9 +36,9 @@ def setUp(self):
             stackwise_activation=["relu", "relu", "hard_swish"],
             include_rescaling=False,
             output_filter=1280,
-            activation="hard_swish",
+            activation=keras.activations.hard_swish,
             inverted_res_block=True,
-            input_filter=16,
+            input_filters=16,
             image_shape=(224, 224, 3),
         )
         self.init_kwargs = {

From 70b5f9eb68387681f9e37aab44a79a081f49a478 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Thu, 22 Aug 2024 20:41:30 +0530
Subject: [PATCH 20/28] activation function format changed

---
 .../src/models/mobilenet/mobilenet_backbone.py   | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 2892d2ca02..8bf9255aaf 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -86,7 +86,7 @@ class MobileNetBackbone(Backbone):
         stackwise_activation=["relu", "relu6", "hard_swish"],
         include_rescaling=False,
         output_filter=1280,
-        activation="hard_swish",
+        activation=keras.activations.hard_swish,
         inverted_res_block=True,
 
     )
@@ -111,10 +111,6 @@ def __init__(
         image_shape=(224, 224, 3),
         **kwargs,
     ):
-        activation_str = activation
-
-        activation = keras.activations.get(activation)
-
         # === Functional Model ===
 
         inputs = keras.layers.Input(shape=image_shape)
@@ -139,7 +135,9 @@ def __init__(
             name="input_batch_norm",
         )(x)
 
-        x = activation(x)
+        x = keras.layers.Activation(
+            activation,
+        )(x)
 
         for stack_index in range(len(stackwise_filters)):
 
@@ -200,7 +198,7 @@ def __init__(
         self.depth_multiplier = depth_multiplier
         self.input_filters = input_filters
         self.output_filter = output_filter
-        self.activation = activation_str
+        self.activation = keras.activations.serialize(activation=activation)
         self.inverted_res_block = inverted_res_block
         self.image_shape = image_shape
 
@@ -312,7 +310,7 @@ def apply_inverted_res_block(
             momentum=BN_MOMENTUM,
             name=prefix + "expand_BatchNorm",
         )(x)
-        x = activation(x)
+        x = keras.layers.Activation(activation=activation)(x)
 
     if stride == 2:
         x = keras.layers.ZeroPadding2D(
@@ -333,7 +331,7 @@ def apply_inverted_res_block(
         momentum=BN_MOMENTUM,
         name=prefix + "depthwise_BatchNorm",
     )(x)
-    x = activation(x)
+    x = keras.layers.Activation(activation=activation)(x)
 
     if se_ratio:
         se_filters = adjust_channels(infilters * expansion)

From 2f8ba292fe06893b25221f90ad2c204b25c07d88 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Thu, 22 Aug 2024 21:09:30 +0530
Subject: [PATCH 21/28] minor bug fixed

---
 .../models/mobilenet/mobilenet_backbone.py    | 22 +++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 8bf9255aaf..820a3295de 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -182,8 +182,7 @@ def __init__(
                 momentum=BN_MOMENTUM,
                 name="output_batch_norm",
             )(x)
-
-            x = activation(x)
+            x = keras.layers.Activation(activation)(x)
 
         super().__init__(inputs=inputs, outputs=x, **kwargs)
 
@@ -198,7 +197,7 @@ def __init__(
         self.depth_multiplier = depth_multiplier
         self.input_filters = input_filters
         self.output_filter = output_filter
-        self.activation = keras.activations.serialize(activation=activation)
+        self.activation = keras.activations.get(activation=activation)
         self.inverted_res_block = inverted_res_block
         self.image_shape = image_shape
 
@@ -217,24 +216,15 @@ def get_config(self):
                 "depth_multiplier": self.depth_multiplier,
                 "input_filters": self.input_filters,
                 "output_filter": self.output_filter,
-                "activation": self.activation,
+                "activation": keras.activations.serialize(
+                    activation=self.activation
+                ),
                 "inverted_res_block": self.inverted_res_block,
             }
         )
         return config
 
 
-class HardSigmoidActivation:
-    def __init__(self):
-        super().__init__()
-
-    def call(self, x):
-        return keras.activations.hard_sigmoid(x)
-
-    def get_config(self):
-        return super().get_config()
-
-
 def adjust_channels(x, divisor=8, min_value=None):
     """Ensure that all layers have a channel number divisible by the `divisor`.
 
@@ -340,7 +330,7 @@ def apply_inverted_res_block(
             filters=se_filters,
             bottleneck_filters=adjust_channels(se_filters * se_ratio),
             squeeze_activation="relu",
-            excite_activation=HardSigmoidActivation(),
+            excite_activation=keras.activations.hard_sigmoid,
         )
 
     x = keras.layers.Conv2D(

From 67efa000320ae60981c71ac7d5f04a7c66b97c5b Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Thu, 22 Aug 2024 21:52:37 +0530
Subject: [PATCH 22/28] minor bug fixed

---
 keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 820a3295de..2e73883470 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -197,7 +197,7 @@ def __init__(
         self.depth_multiplier = depth_multiplier
         self.input_filters = input_filters
         self.output_filter = output_filter
-        self.activation = keras.activations.get(activation=activation)
+        self.activation = keras.activations.get(activation)
         self.inverted_res_block = inverted_res_block
         self.image_shape = image_shape
 

From 83abfc68feb6bb724c0646c8df25253fd6dc31fb Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Fri, 23 Aug 2024 12:19:02 +0530
Subject: [PATCH 23/28] added vision_backbone_test

---
 keras_nlp/src/models/mobilenet/mobilenet_backbone.py      | 3 +++
 keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 2e73883470..513e1f88f4 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -112,6 +112,9 @@ def __init__(
         **kwargs,
     ):
         # === Functional Model ===
+        CHANNEL_AXIS = (
+            1 if keras.config.image_data_format == "channels_first" else -1
+        )
 
         inputs = keras.layers.Input(shape=image_shape)
         x = inputs
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index c5b3790366..07bbc6984f 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -40,7 +40,7 @@ def setUp(self):
         self.input_data = np.ones((2, 224, 224, 3), dtype="float32")
 
     def test_backbone_basics(self):
-        self.run_backbone_test(
+        self.run_vision_backbone_test(
             cls=MobileNetBackbone,
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,

From 8d0e6dcbe3b69165034f62eceea34d46b9222053 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Fri, 23 Aug 2024 12:43:38 +0530
Subject: [PATCH 24/28] channel_first bug resolved

---
 keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 513e1f88f4..5903610170 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -463,13 +463,16 @@ def SqueezeAndExcite2D(
         bottleneck_filters = filters // 4
 
     x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
+    if CHANNEL_AXIS == 1:
+        x = keras.layers.Permute((2, 3, 1))(x)
     x = keras.layers.Conv2D(
         bottleneck_filters,
         (1, 1),
         activation=squeeze_activation,
     )(x)
     x = keras.layers.Conv2D(filters, (1, 1), activation=excite_activation)(x)
-
+    if CHANNEL_AXIS == 1:
+        x = keras.layers.Permute((3, 1, 2))(x)
     x = ops.multiply(x, input)
     return x
 

From 0af04abbbd95ab643f972d50b2b7bb8563f1bc09 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Fri, 23 Aug 2024 16:43:01 +0530
Subject: [PATCH 25/28] channel_first cases working

---
 .../models/mobilenet/mobilenet_backbone.py    | 35 +++++++++++++------
 .../mobilenet/mobilenet_backbone_test.py      |  1 +
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index 5903610170..a9b8503945 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -17,7 +17,7 @@
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.models.backbone import Backbone
 
-CHANNEL_AXIS = -1
+# CHANNEL_AXIS = -1 if keras.config.image_data_format() == "channels_last" else 1
 BN_EPSILON = 1e-3
 BN_MOMENTUM = 0.999
 
@@ -105,15 +105,15 @@ def __init__(
         include_rescaling,
         output_filter,
         inverted_res_block,
+        image_shape=(224, 224, 3),
         activation=keras.activations.hard_swish,
         depth_multiplier=1.0,
         input_filters=16,
-        image_shape=(224, 224, 3),
         **kwargs,
     ):
         # === Functional Model ===
         CHANNEL_AXIS = (
-            1 if keras.config.image_data_format == "channels_first" else -1
+            -1 if keras.config.image_data_format() == "channels_last" else 1
         )
 
         inputs = keras.layers.Input(shape=image_shape)
@@ -128,6 +128,7 @@ def __init__(
             kernel_size=3,
             strides=(2, 2),
             padding="same",
+            data_format=keras.config.image_data_format(),
             use_bias=False,
             name="input_conv",
         )(x)
@@ -137,7 +138,6 @@ def __init__(
             momentum=BN_MOMENTUM,
             name="input_batch_norm",
         )(x)
-
         x = keras.layers.Activation(
             activation,
         )(x)
@@ -176,6 +176,7 @@ def __init__(
                 last_conv_ch,
                 kernel_size=1,
                 padding="same",
+                data_format=keras.config.image_data_format(),
                 use_bias=False,
                 name="output_conv",
             )(x)
@@ -282,6 +283,9 @@ def apply_inverted_res_block(
     Returns:
         the updated input tensor.
     """
+    CHANNEL_AXIS = (
+        -1 if keras.config.image_data_format() == "channels_last" else 1
+    )
     activation = keras.activations.get(activation)
     shortcut = x
     prefix = "expanded_conv_"
@@ -294,6 +298,7 @@ def apply_inverted_res_block(
             adjust_channels(infilters * expansion),
             kernel_size=1,
             padding="same",
+            data_format=keras.config.image_data_format(),
             use_bias=False,
             name=prefix + "expand",
         )(x)
@@ -315,6 +320,7 @@ def apply_inverted_res_block(
         kernel_size,
         strides=stride,
         padding="same" if stride == 1 else "valid",
+        data_format=keras.config.image_data_format(),
         use_bias=False,
         name=prefix + "depthwise",
     )(x)
@@ -340,6 +346,7 @@ def apply_inverted_res_block(
         filters,
         kernel_size=1,
         padding="same",
+        data_format=keras.config.image_data_format(),
         use_bias=False,
         name=prefix + "project",
     )(x)
@@ -392,7 +399,9 @@ def apply_depthwise_conv_block(
     Returns:
         Output tensor of block.
     """
-
+    CHANNEL_AXIS = (
+        -1 if keras.config.image_data_format() == "channels_last" else 1
+    )
     if stride == 2:
         x = keras.layers.ZeroPadding2D(
             padding=correct_pad_downsample(x, kernel_size),
@@ -403,6 +412,7 @@ def apply_depthwise_conv_block(
         kernel_size,
         strides=stride,
         padding="same" if stride == 1 else "valid",
+        data_format=keras.config.image_data_format(),
         depth_multiplier=depth_multiplier,
         use_bias=False,
         name="depthwise_%d" % block_id,
@@ -419,6 +429,7 @@ def apply_depthwise_conv_block(
         filters,
         kernel_size=1,
         padding="same",
+        data_format=keras.config.image_data_format(),
         use_bias=False,
         name="conv_%d" % block_id,
     )(x)
@@ -463,16 +474,20 @@ def SqueezeAndExcite2D(
         bottleneck_filters = filters // 4
 
     x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input)
-    if CHANNEL_AXIS == 1:
-        x = keras.layers.Permute((2, 3, 1))(x)
+
     x = keras.layers.Conv2D(
         bottleneck_filters,
         (1, 1),
+        data_format=keras.config.image_data_format(),
         activation=squeeze_activation,
     )(x)
-    x = keras.layers.Conv2D(filters, (1, 1), activation=excite_activation)(x)
-    if CHANNEL_AXIS == 1:
-        x = keras.layers.Permute((3, 1, 2))(x)
+    x = keras.layers.Conv2D(
+        filters,
+        (1, 1),
+        data_format=keras.config.image_data_format(),
+        activation=excite_activation,
+    )(x)
+
     x = ops.multiply(x, input)
     return x
 
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index 07bbc6984f..542c1cc785 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -46,6 +46,7 @@ def test_backbone_basics(self):
             input_data=self.input_data,
             expected_output_shape=(2, 28, 28, 96),
             run_mixed_precision_check=False,
+            run_data_format_check=False,
         )
 
     @pytest.mark.large

From 27e1759b5cb0fe663b0267fdbdaac5935f973e3c Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Sat, 24 Aug 2024 12:18:33 +0530
Subject: [PATCH 26/28] comments  resolved

---
 .../models/mobilenet/mobilenet_backbone.py    | 78 ++++++++++---------
 .../mobilenet/mobilenet_backbone_test.py      |  2 +-
 .../mobilenet_image_classifier_test.py        |  4 +-
 3 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index a9b8503945..f0aeaa9ecb 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -17,7 +17,6 @@
 from keras_nlp.src.api_export import keras_nlp_export
 from keras_nlp.src.models.backbone import Backbone
 
-# CHANNEL_AXIS = -1 if keras.config.image_data_format() == "channels_last" else 1
 BN_EPSILON = 1e-3
 BN_MOMENTUM = 0.999
 
@@ -47,8 +46,14 @@ class MobileNetBackbone(Backbone):
             each inverted residual block in the model.
         stackwise_filters: list of ints, number of filters for each inverted
             residual block in the model.
+        stackwise_kernel_size: list of ints, kernel size for each inverted
+            residual block in the model.
         stackwise_stride: list of ints, stride length for each inverted
             residual block in the model.
+        stackwise_se_ratio: se ratio for each inverted residual block in the
+            model. 0 if dont want to add Squeeze and Excite layer.
+        stackwise_activation: list of activation functions, for each inverted
+             residual block in the model.
         include_rescaling: bool, whether to rescale the inputs. If set to True,
             inputs will be passed through a `Rescaling(scale=1 / 255)`
             layer.
@@ -60,8 +65,8 @@ class MobileNetBackbone(Backbone):
                 of filters in each layer.
             - If `depth_multiplier` = 1, default number of filters from the paper
                 are used at each layer.
-        input_filter: number of filters in first convolution layer
-        output_filter: specifies whether to add conv and batch_norm in the end,
+        input_filters: number of filters in first convolution layer
+        output_filters: specifies whether to add conv and batch_norm in the end,
             if set to None, it will not add these layers in the end.
             'None' for MobileNetV1
         activation: activation function to be used
@@ -85,7 +90,7 @@ class MobileNetBackbone(Backbone):
         stackwise_se_ratio=[0.25, None, 0.25],
         stackwise_activation=["relu", "relu6", "hard_swish"],
         include_rescaling=False,
-        output_filter=1280,
+        output_filters=1280,
         activation=keras.activations.hard_swish,
         inverted_res_block=True,
 
@@ -103,16 +108,16 @@ def __init__(
         stackwise_se_ratio,
         stackwise_activation,
         include_rescaling,
-        output_filter,
+        output_filters,
         inverted_res_block,
         image_shape=(224, 224, 3),
-        activation=keras.activations.hard_swish,
+        activation="hard_swish",
         depth_multiplier=1.0,
         input_filters=16,
         **kwargs,
     ):
         # === Functional Model ===
-        CHANNEL_AXIS = (
+        channel_axis = (
             -1 if keras.config.image_data_format() == "channels_last" else 1
         )
 
@@ -133,24 +138,23 @@ def __init__(
             name="input_conv",
         )(x)
         x = keras.layers.BatchNormalization(
-            axis=CHANNEL_AXIS,
+            axis=channel_axis,
             epsilon=BN_EPSILON,
             momentum=BN_MOMENTUM,
             name="input_batch_norm",
         )(x)
-        x = keras.layers.Activation(
-            activation,
-        )(x)
+        x = keras.layers.Activation(activation)(x)
 
         for stack_index in range(len(stackwise_filters)):
+            filters = adjust_channels(
+                (stackwise_filters[stack_index]) * depth_multiplier
+            )
 
             if inverted_res_block:
                 x = apply_inverted_res_block(
                     x,
                     expansion=stackwise_expansion[stack_index],
-                    filters=adjust_channels(
-                        (stackwise_filters[stack_index]) * depth_multiplier
-                    ),
+                    filters=filters,
                     kernel_size=stackwise_kernel_size[stack_index],
                     stride=stackwise_stride[stack_index],
                     se_ratio=(stackwise_se_ratio[stack_index]),
@@ -160,17 +164,15 @@ def __init__(
             else:
                 x = apply_depthwise_conv_block(
                     x,
-                    filters=adjust_channels(
-                        (stackwise_filters[stack_index]) * depth_multiplier
-                    ),
+                    filters=filters,
                     kernel_size=3,
                     stride=stackwise_stride[stack_index],
                     depth_multiplier=depth_multiplier,
                     block_id=stack_index,
                 )
 
-        if output_filter is not None:
-            last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6)
+        if output_filters is not None:
+            last_conv_ch = adjust_channels(x.shape[channel_axis] * 6)
 
             x = keras.layers.Conv2D(
                 last_conv_ch,
@@ -181,7 +183,7 @@ def __init__(
                 name="output_conv",
             )(x)
             x = keras.layers.BatchNormalization(
-                axis=CHANNEL_AXIS,
+                axis=channel_axis,
                 epsilon=BN_EPSILON,
                 momentum=BN_MOMENTUM,
                 name="output_batch_norm",
@@ -200,7 +202,7 @@ def __init__(
         self.include_rescaling = include_rescaling
         self.depth_multiplier = depth_multiplier
         self.input_filters = input_filters
-        self.output_filter = output_filter
+        self.output_filters = output_filters
         self.activation = keras.activations.get(activation)
         self.inverted_res_block = inverted_res_block
         self.image_shape = image_shape
@@ -219,7 +221,7 @@ def get_config(self):
                 "image_shape": self.image_shape,
                 "depth_multiplier": self.depth_multiplier,
                 "input_filters": self.input_filters,
-                "output_filter": self.output_filter,
+                "output_filters": self.output_filters,
                 "activation": keras.activations.serialize(
                     activation=self.activation
                 ),
@@ -283,13 +285,13 @@ def apply_inverted_res_block(
     Returns:
         the updated input tensor.
     """
-    CHANNEL_AXIS = (
+    channel_axis = (
         -1 if keras.config.image_data_format() == "channels_last" else 1
     )
     activation = keras.activations.get(activation)
     shortcut = x
     prefix = "expanded_conv_"
-    infilters = x.shape[CHANNEL_AXIS]
+    infilters = x.shape[channel_axis]
 
     if expansion_index > 0:
         prefix = f"expanded_conv_{expansion_index}_"
@@ -303,7 +305,7 @@ def apply_inverted_res_block(
             name=prefix + "expand",
         )(x)
         x = keras.layers.BatchNormalization(
-            axis=CHANNEL_AXIS,
+            axis=channel_axis,
             epsilon=BN_EPSILON,
             momentum=BN_MOMENTUM,
             name=prefix + "expand_BatchNorm",
@@ -325,7 +327,7 @@ def apply_inverted_res_block(
         name=prefix + "depthwise",
     )(x)
     x = keras.layers.BatchNormalization(
-        axis=CHANNEL_AXIS,
+        axis=channel_axis,
         epsilon=BN_EPSILON,
         momentum=BN_MOMENTUM,
         name=prefix + "depthwise_BatchNorm",
@@ -351,7 +353,7 @@ def apply_inverted_res_block(
         name=prefix + "project",
     )(x)
     x = keras.layers.BatchNormalization(
-        axis=CHANNEL_AXIS,
+        axis=channel_axis,
         epsilon=BN_EPSILON,
         momentum=BN_MOMENTUM,
         name=prefix + "project_BatchNorm",
@@ -381,12 +383,13 @@ def apply_depthwise_conv_block(
         x: Input tensor of shape `(rows, cols, channels)
         filters: Integer, the dimensionality of the output space
             (i.e. the number of output filters in the pointwise convolution).
-        depth_multiplier: controls the width of the network. - If `depth_multiplier` < 1.0,
-            proportionally decreases the number of filters in each layer.
-            - If `depth_multiplier` > 1.0, proportionally increases the number of filters
-                in each layer.
-            - If `depth_multiplier` = 1, default number of filters from the paper are
-                used at each layer.
+        depth_multiplier: controls the width of the network.
+            - If `depth_multiplier` < 1.0, proportionally decreases the number
+                 of filters in each layer.
+            - If `depth_multiplier` > 1.0, proportionally increases the number
+              of filters in each layer.
+            - If `depth_multiplier` = 1, default number of filters from the
+                paper are used at each layer.
         strides: An integer or tuple/list of 2 integers, specifying the strides
             of the convolution along the width and height.
             Can be a single integer to specify the same value for
@@ -395,11 +398,12 @@ def apply_depthwise_conv_block(
         block_id: Integer, a unique identification designating the block number.
 
     Input shape:
-        4D tensor with shape: `(batch, rows, cols, channels)`
+        4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last"
+        4D tensor with shape: `(batch, channels, rows, cols)` in "channels_first"
     Returns:
         Output tensor of block.
     """
-    CHANNEL_AXIS = (
+    channel_axis = (
         -1 if keras.config.image_data_format() == "channels_last" else 1
     )
     if stride == 2:
@@ -418,7 +422,7 @@ def apply_depthwise_conv_block(
         name="depthwise_%d" % block_id,
     )(x)
     x = keras.layers.BatchNormalization(
-        axis=CHANNEL_AXIS,
+        axis=channel_axis,
         epsilon=BN_EPSILON,
         momentum=BN_MOMENTUM,
         name="depthwise_BatchNorm_%d" % block_id,
@@ -434,7 +438,7 @@ def apply_depthwise_conv_block(
         name="conv_%d" % block_id,
     )(x)
     x = keras.layers.BatchNormalization(
-        axis=CHANNEL_AXIS,
+        axis=channel_axis,
         epsilon=BN_EPSILON,
         momentum=BN_MOMENTUM,
         name="BatchNorm_%d" % block_id,
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index 542c1cc785..f8fdd2079e 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -30,7 +30,7 @@ def setUp(self):
             "stackwise_se_ratio": [0.25, None, 0.25],
             "stackwise_activation": ["relu", "relu", "hard_swish"],
             "include_rescaling": False,
-            "output_filter": 1280,
+            "output_filters": 1280,
             "activation": keras.activations.hard_swish,
             "inverted_res_block": True,
             "input_filters": 16,
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index 0a561056e4..46ce9c9220 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -35,8 +35,8 @@ def setUp(self):
             stackwise_se_ratio=[0.25, None, 0.25],
             stackwise_activation=["relu", "relu", "hard_swish"],
             include_rescaling=False,
-            output_filter=1280,
-            activation=keras.activations.hard_swish,
+            output_filters=1280,
+            activation="hard_swish",
             inverted_res_block=True,
             input_filters=16,
             image_shape=(224, 224, 3),

From b4bf090fb037d6e548295dfc51071f6c7f9cad05 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Mon, 26 Aug 2024 00:59:23 +0530
Subject: [PATCH 27/28] formatting fixed

---
 .../src/models/mobilenet/mobilenet_image_classifier_test.py      | 1 -
 1 file changed, 1 deletion(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index 46ce9c9220..ab393af3ec 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import keras
 import numpy as np
 import pytest
 

From 1804ffeff6f38d1b2ba1c2071333cfc39c2ce6b0 Mon Sep 17 00:00:00 2001
From: ushareng <usha.rengaraju@gmail.com>
Date: Tue, 27 Aug 2024 19:04:29 +0530
Subject: [PATCH 28/28] refactoring

---
 .../models/mobilenet/mobilenet_backbone.py    | 79 +++++++++++--------
 .../mobilenet/mobilenet_backbone_test.py      | 12 +--
 .../mobilenet_image_classifier_test.py        | 11 +--
 3 files changed, 56 insertions(+), 46 deletions(-)

diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
index f0aeaa9ecb..4054b6d76f 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py
@@ -28,7 +28,7 @@ class MobileNetBackbone(Backbone):
     MobileNet is a lightweight convolutional neural network (CNN)
     optimized for mobile and edge devices, striking a balance between
     accuracy and efficiency. By employing depthwise separable convolutions
-    and techniques like Squeeze-and-Excitation (SE) blocks in later versions,
+    and techniques like Squeeze-and-Excitation (SE) blocks,
     MobileNet models are highly suitable for real-time applications on
     resource-constrained devices.
 
@@ -44,11 +44,11 @@ class MobileNetBackbone(Backbone):
     Args:
         stackwise_expansion: list of ints or floats, the expansion ratio for
             each inverted residual block in the model.
-        stackwise_filters: list of ints, number of filters for each inverted
+        stackwise_num_filters: list of ints, number of filters for each inverted
             residual block in the model.
         stackwise_kernel_size: list of ints, kernel size for each inverted
             residual block in the model.
-        stackwise_stride: list of ints, stride length for each inverted
+        stackwise_num_strides: list of ints, stride length for each inverted
             residual block in the model.
         stackwise_se_ratio: se ratio for each inverted residual block in the
             model. 0 if dont want to add Squeeze and Excite layer.
@@ -65,11 +65,14 @@ class MobileNetBackbone(Backbone):
                 of filters in each layer.
             - If `depth_multiplier` = 1, default number of filters from the paper
                 are used at each layer.
-        input_filters: number of filters in first convolution layer
-        output_filters: specifies whether to add conv and batch_norm in the end,
+        input_num_filters: number of filters in first convolution layer
+        output_num_filters: specifies whether to add conv and batch_norm in the end,
             if set to None, it will not add these layers in the end.
             'None' for MobileNetV1
-        activation: activation function to be used
+        input_activation: activation function to be used in the input layer
+            'hard_swish' for MobileNetV3,
+            'relu6' for MobileNetV1 and MobileNetV2
+        output_activation: activation function to be used in the output layer
             'hard_swish' for MobileNetV3,
             'relu6' for MobileNetV1 and MobileNetV2
         inverted_res_block: whether to use inverted residual blocks or not,
@@ -84,14 +87,15 @@ class MobileNetBackbone(Backbone):
     # Randomly initialized backbone with a custom config
     model = MobileNetBackbone(
         stackwise_expansion=[1, 4, 6],
-        stackwise_filters=[4, 8, 16],
+        stackwise_num_filters=[4, 8, 16],
         stackwise_kernel_size=[3, 3, 5],
-        stackwise_stride=[2, 2, 1],
+        stackwise_num_strides=[2, 2, 1],
         stackwise_se_ratio=[0.25, None, 0.25],
         stackwise_activation=["relu", "relu6", "hard_swish"],
         include_rescaling=False,
-        output_filters=1280,
-        activation=keras.activations.hard_swish,
+        output_num_filters=1280,
+        input_activation='hard_swish',
+        output_activation='hard_swish',
         inverted_res_block=True,
 
     )
@@ -102,18 +106,19 @@ class MobileNetBackbone(Backbone):
     def __init__(
         self,
         stackwise_expansion,
-        stackwise_filters,
+        stackwise_num_filters,
         stackwise_kernel_size,
-        stackwise_stride,
+        stackwise_num_strides,
         stackwise_se_ratio,
         stackwise_activation,
         include_rescaling,
-        output_filters,
+        output_num_filters,
         inverted_res_block,
         image_shape=(224, 224, 3),
-        activation="hard_swish",
+        input_activation="hard_swish",
+        output_activation="hard_swish",
         depth_multiplier=1.0,
-        input_filters=16,
+        input_num_filters=16,
         **kwargs,
     ):
         # === Functional Model ===
@@ -127,9 +132,9 @@ def __init__(
         if include_rescaling:
             x = keras.layers.Rescaling(scale=1 / 255)(x)
 
-        input_filters = adjust_channels(input_filters)
+        input_num_filters = adjust_channels(input_num_filters)
         x = keras.layers.Conv2D(
-            input_filters,
+            input_num_filters,
             kernel_size=3,
             strides=(2, 2),
             padding="same",
@@ -143,11 +148,11 @@ def __init__(
             momentum=BN_MOMENTUM,
             name="input_batch_norm",
         )(x)
-        x = keras.layers.Activation(activation)(x)
+        x = keras.layers.Activation(input_activation)(x)
 
-        for stack_index in range(len(stackwise_filters)):
+        for stack_index in range(len(stackwise_num_filters)):
             filters = adjust_channels(
-                (stackwise_filters[stack_index]) * depth_multiplier
+                (stackwise_num_filters[stack_index]) * depth_multiplier
             )
 
             if inverted_res_block:
@@ -156,7 +161,7 @@ def __init__(
                     expansion=stackwise_expansion[stack_index],
                     filters=filters,
                     kernel_size=stackwise_kernel_size[stack_index],
-                    stride=stackwise_stride[stack_index],
+                    stride=stackwise_num_strides[stack_index],
                     se_ratio=(stackwise_se_ratio[stack_index]),
                     activation=stackwise_activation[stack_index],
                     expansion_index=stack_index,
@@ -166,12 +171,12 @@ def __init__(
                     x,
                     filters=filters,
                     kernel_size=3,
-                    stride=stackwise_stride[stack_index],
+                    stride=stackwise_num_strides[stack_index],
                     depth_multiplier=depth_multiplier,
                     block_id=stack_index,
                 )
 
-        if output_filters is not None:
+        if output_num_filters is not None:
             last_conv_ch = adjust_channels(x.shape[channel_axis] * 6)
 
             x = keras.layers.Conv2D(
@@ -188,22 +193,23 @@ def __init__(
                 momentum=BN_MOMENTUM,
                 name="output_batch_norm",
             )(x)
-            x = keras.layers.Activation(activation)(x)
+            x = keras.layers.Activation(output_activation)(x)
 
         super().__init__(inputs=inputs, outputs=x, **kwargs)
 
         # === Config ===
         self.stackwise_expansion = stackwise_expansion
-        self.stackwise_filters = stackwise_filters
+        self.stackwise_num_filters = stackwise_num_filters
         self.stackwise_kernel_size = stackwise_kernel_size
-        self.stackwise_stride = stackwise_stride
+        self.stackwise_num_strides = stackwise_num_strides
         self.stackwise_se_ratio = stackwise_se_ratio
         self.stackwise_activation = stackwise_activation
         self.include_rescaling = include_rescaling
         self.depth_multiplier = depth_multiplier
-        self.input_filters = input_filters
-        self.output_filters = output_filters
-        self.activation = keras.activations.get(activation)
+        self.input_num_filters = input_num_filters
+        self.output_num_filters = output_num_filters
+        self.input_activation = keras.activations.get(input_activation)
+        self.output_activation = keras.activations.get(output_activation)
         self.inverted_res_block = inverted_res_block
         self.image_shape = image_shape
 
@@ -212,18 +218,21 @@ def get_config(self):
         config.update(
             {
                 "stackwise_expansion": self.stackwise_expansion,
-                "stackwise_filters": self.stackwise_filters,
+                "stackwise_num_filters": self.stackwise_num_filters,
                 "stackwise_kernel_size": self.stackwise_kernel_size,
-                "stackwise_stride": self.stackwise_stride,
+                "stackwise_num_strides": self.stackwise_num_strides,
                 "stackwise_se_ratio": self.stackwise_se_ratio,
                 "stackwise_activation": self.stackwise_activation,
                 "include_rescaling": self.include_rescaling,
                 "image_shape": self.image_shape,
                 "depth_multiplier": self.depth_multiplier,
-                "input_filters": self.input_filters,
-                "output_filters": self.output_filters,
-                "activation": keras.activations.serialize(
-                    activation=self.activation
+                "input_num_filters": self.input_num_filters,
+                "output_num_filters": self.output_num_filters,
+                "input_activation": keras.activations.serialize(
+                    activation=self.input_activation
+                ),
+                "output_activation": keras.activations.serialize(
+                    activation=self.output_activation
                 ),
                 "inverted_res_block": self.inverted_res_block,
             }
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
index f8fdd2079e..80225abe04 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import keras
 import numpy as np
 import pytest
 
@@ -24,16 +23,17 @@ class MobileNetBackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
             "stackwise_expansion": [1, 4, 6],
-            "stackwise_filters": [4, 8, 16],
+            "stackwise_num_filters": [4, 8, 16],
             "stackwise_kernel_size": [3, 3, 5],
-            "stackwise_stride": [2, 2, 1],
+            "stackwise_num_strides": [2, 2, 1],
             "stackwise_se_ratio": [0.25, None, 0.25],
             "stackwise_activation": ["relu", "relu", "hard_swish"],
             "include_rescaling": False,
-            "output_filters": 1280,
-            "activation": keras.activations.hard_swish,
+            "output_num_filters": 1280,
+            "input_activation": "hard_swish",
+            "output_activation": "hard_swish",
             "inverted_res_block": True,
-            "input_filters": 16,
+            "input_num_filters": 16,
             "image_shape": (224, 224, 3),
             "depth_multiplier": 1,
         }
diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
index ab393af3ec..29d00e6d24 100644
--- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -28,16 +28,17 @@ def setUp(self):
         self.labels = [0, 3]
         self.backbone = MobileNetBackbone(
             stackwise_expansion=[1, 4, 6],
-            stackwise_filters=[4, 8, 16],
+            stackwise_num_filters=[4, 8, 16],
             stackwise_kernel_size=[3, 3, 5],
-            stackwise_stride=[2, 2, 1],
+            stackwise_num_strides=[2, 2, 1],
             stackwise_se_ratio=[0.25, None, 0.25],
             stackwise_activation=["relu", "relu", "hard_swish"],
             include_rescaling=False,
-            output_filters=1280,
-            activation="hard_swish",
+            output_num_filters=1280,
+            input_activation="hard_swish",
+            output_activation="hard_swish",
             inverted_res_block=True,
-            input_filters=16,
+            input_num_filters=16,
             image_shape=(224, 224, 3),
         )
         self.init_kwargs = {