From 737eecbc69ed96a653651c421daa2818b1fc0ca9 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 12:25:00 +0530 Subject: [PATCH 01/28] mobilenet_v3 added in keras-nlp --- keras_nlp/api/models/__init__.py | 4 + keras_nlp/src/models/mobilenet_v3/__init__.py | 13 + .../mobilenet_v3/mobilenet_v3_backbone.py | 357 ++++++++++++++++++ .../mobilenet_v3_backbone_test.py | 52 +++ .../mobilenet_v3_image_classifier.py | 111 ++++++ .../mobilenet_v3_image_classifier_test.py | 67 ++++ 6 files changed, 604 insertions(+) create mode 100644 keras_nlp/src/models/mobilenet_v3/__init__.py create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py create mode 100644 keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py index 6f7e08c520..5b957ee548 100644 --- a/keras_nlp/api/models/__init__.py +++ b/keras_nlp/api/models/__init__.py @@ -165,6 +165,10 @@ MistralPreprocessor, ) from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import ( + MobileNetV3ImageClassifier, +) from keras_nlp.src.models.opt.opt_backbone import OPTBackbone from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import ( diff --git a/keras_nlp/src/models/mobilenet_v3/__init__.py b/keras_nlp/src/models/mobilenet_v3/__init__.py new file mode 100644 index 0000000000..2351a1b7b4 --- /dev/null +++ b/keras_nlp/src/models/mobilenet_v3/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py new file mode 100644 index 0000000000..f85e6efa67 --- /dev/null +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -0,0 +1,357 @@ +import keras +from keras import ops +from keras_nlp.src.api_export import keras_nlp_export +from keras_nlp.src.models.backbone import Backbone + + +CHANNEL_AXIS = -1 +BN_EPSILON = 1e-3 +BN_MOMENTUM = 0.999 + + +@keras_cv_export("keras_nlp.models.MobileNetV3Backbone") +class MobileNetV3Backbone(Backbone): + """Instantiates the MobileNetV3 architecture. + + References: + - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) + (ICCV 2019) + - [Based on the Original keras.applications MobileNetv3](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v3.py) + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). + + Args: + stackwise_expansion: list of ints or floats, the expansion ratio for + each inverted residual block in the model. + stackwise_filters: list of ints, number of filters for each inverted + residual block in the model. + stackwise_stride: list of ints, stride length for each inverted + residual block in the model. + include_rescaling: bool, whether to rescale the inputs. If set to True, + inputs will be passed through a `Rescaling(scale=1 / 255)` + layer. + input_shape: optional shape tuple, defaults to (None, None, 3). + alpha: float, controls the width of the network. This is known as the + depth multiplier in the MobileNetV3 paper, but the name is kept for + consistency with MobileNetV1 in Keras. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + + Example: + ```python + input_data = tf.ones(shape=(8, 224, 224, 3)) + + # Randomly initialized backbone with a custom config + model = MobileNetV3Backbone( + stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], + stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], + stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], + stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], + stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], + stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + include_rescaling=False, + ) + output = model(input_data) + ``` + """ # noqa: E501 + + def __init__( + self, + *, + stackwise_expansion, + stackwise_filters, + stackwise_kernel_size, + stackwise_stride, + stackwise_se_ratio, + stackwise_activation, + include_rescaling, + input_shape=(224, 224, 3), + alpha=1.0, + **kwargs, + ): + inputs = keras.layers.Input(shape=input_image_shape) + x = inputs + + if include_rescaling: + x = keras.layers.Rescaling(scale=1 / 255)(x) + + x = keras.layers.Conv2D( + 16, + kernel_size=3, + strides=(2, 2), + padding="same", + use_bias=False, + name="Conv", + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="Conv_BatchNorm", + )(x) + x = apply_hard_swish(x) + + for stack_index in range(len(stackwise_filters)): + + x = apply_inverted_res_block( + x, + expansion=stackwise_expansion[stack_index], + filters=adjust_channels( + (stackwise_filters[stack_index]) * alpha + ), + kernel_size=stackwise_kernel_size[stack_index], + stride=stackwise_stride[stack_index], + se_ratio=stackwise_se_ratio[stack_index], + activation=stackwise_activation[stack_index], + expansion_index=stack_index, + ) + + last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) + + x = keras.layers.Conv2D( + last_conv_ch, + kernel_size=1, + padding="same", + use_bias=False, + name="Conv_1", + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="Conv_1_BatchNorm", + )(x) + x = apply_hard_swish(x) + + super().__init__(inputs=inputs, outputs=x, **kwargs) + + self.stackwise_expansion = stackwise_expansion + self.stackwise_filters = stackwise_filters + self.stackwise_kernel_size = stackwise_kernel_size + self.stackwise_stride = stackwise_stride + self.stackwise_se_ratio = stackwise_se_ratio + self.stackwise_activation = stackwise_activation + self.include_rescaling = include_rescaling + self.alpha = alpha + + def get_config(self): + config = super().get_config() + config.update( + { + "stackwise_expansion": self.stackwise_expansion, + "stackwise_filters": self.stackwise_filters, + "stackwise_kernel_size": self.stackwise_kernel_size, + "stackwise_stride": self.stackwise_stride, + "stackwise_se_ratio": self.stackwise_se_ratio, + "stackwise_activation": self.stackwise_activation, + "include_rescaling": self.include_rescaling, + "input_shape": self.input_shape[1:], + "alpha": self.alpha, + } + ) + return config + + +class HardSigmoidActivation(keras.layers.Layer): + def __init__(self): + super().__init__() + + def call(self, x): + return apply_hard_sigmoid(x) + + def get_config(self): + return super().get_config() + + +def adjust_channels(x, divisor=8, min_value=None): + """Ensure that all layers have a channel number divisible by the `divisor`. + + Args: + x: integer, input value. + divisor: integer, the value by which a channel number should be + divisible, defaults to 8. + min_value: float, optional minimum value for the new tensor. If None, + defaults to value of divisor. + + Returns: + the updated input scalar. + """ + + if min_value is None: + min_value = divisor + + new_x = max(min_value, int(x + divisor / 2) // divisor * divisor) + + # make sure that round down does not go down by more than 10%. + if new_x < 0.9 * x: + new_x += divisor + return new_x + + +def apply_hard_sigmoid(x): + activation = keras.layers.ReLU(6.0) + return activation(x + 3.0) * (1.0 / 6.0) + + +def apply_hard_swish(x): + return keras.layers.Multiply()([x, apply_hard_sigmoid(x)]) + + +def apply_inverted_res_block( + x, + expansion, + filters, + kernel_size, + stride, + se_ratio, + activation, + expansion_index, +): + """An Inverted Residual Block. + + Args: + x: input tensor. + expansion: integer, the expansion ratio, multiplied with infilters to + get the minimum value passed to adjust_channels. + filters: integer, number of filters for convolution layer. + kernel_size: integer, the kernel size for DepthWise Convolutions. + stride: integer, the stride length for DepthWise Convolutions. + se_ratio: float, ratio for bottleneck filters. Number of bottleneck + filters = filters * se_ratio. + activation: the activation layer to use. + expansion_index: integer, a unique identification if you want to use + expanded convolutions. If greater than 0, an additional Conv+BN + layer is added after the expanded convolutional layer. + + Returns: + the updated input tensor. + """ + if isinstance(activation, str): + if activation == "hard_swish": + activation = apply_hard_swish + else: + activation = keras.activations.get(activation) + + shortcut = x + prefix = "expanded_conv_" + infilters = x.shape[CHANNEL_AXIS] + + if expansion_index > 0: + prefix = f"expanded_conv_{expansion_index}_" + + x = keras.layers.Conv2D( + adjust_channels(infilters * expansion), + kernel_size=1, + padding="same", + use_bias=False, + name=prefix + "expand", + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=prefix + "expand_BatchNorm", + )(x) + x = activation(x) + + if stride == 2: + x = keras.layers.ZeroPadding2D( + padding=utils.correct_pad_downsample(x, kernel_size), + name=prefix + "depthwise_pad", + )(x) + + x = keras.layers.DepthwiseConv2D( + kernel_size, + strides=stride, + padding="same" if stride == 1 else "valid", + use_bias=False, + name=prefix + "depthwise", + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=prefix + "depthwise_BatchNorm", + )(x) + x = activation(x) + + if se_ratio: + se_filters = adjust_channels(infilters * expansion) + x = SqueezeAndExcite2D( + x, + se_filters, + adjust_channels(se_filters * se_ratio), + "relu", + HardSigmoidActivation(), + ) + + x = keras.layers.Conv2D( + filters, + kernel_size=1, + padding="same", + use_bias=False, + name=prefix + "project", + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=prefix + "project_BatchNorm", + )(x) + + if stride == 1 and infilters == filters: + x = keras.layers.Add(name=prefix + "Add")([shortcut, x]) + + return x + +def SqueezeAndExcite2D( + input, + filters, + bottleneck_filters=None, + squeeze_activation="relu", + excite_activation="sigmoid", +): + """ + Args: + filters: Number of input and output filters. The number of input and + output filters is same. + bottleneck_filters: (Optional) Number of bottleneck filters. Defaults + to `0.25 * filters` + squeeze_activation: (Optional) String, callable (or + keras.layers.Layer) or keras.activations.Activation instance + denoting activation to be applied after squeeze convolution. + Defaults to `relu`. + excite_activation: (Optional) String, callable (or + keras.layers.Layer) or keras.activations.Activation instance + denoting activation to be applied after excite convolution. + Defaults to `sigmoid`. + Example: + + ```python + # (...) + input = tf.ones((1, 5, 5, 16), dtype=tf.float32) + x = keras.layers.Conv2D(16, (3, 3))(input) + + # (...) + ``` + """ + if not bottleneck_filters: + bottleneck_filters = (filters // 4) + + x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input) + x = keras.layers.Conv2D( + bottleneck_filters, + (1, 1), + activation=self.squeeze_activation, + )(x) + x = keras.layers.Conv2D( + self.filters, (1, 1), activation=self.excite_activation + )(x) + + x = ops.multiply(x, input) + return x \ No newline at end of file diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py new file mode 100644 index 0000000000..d106891ddf --- /dev/null +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py @@ -0,0 +1,52 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.tests.test_case import TestCase + + +class MobileNetV3BackboneTest(TestCase): + def setUp(self): + self.init_kwargs = { + "stackwise_expansion": [1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], + "stackwise_filters": [16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], + "stackwise_kernel_size": [3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], + "stackwise_stride": [2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], + "stackwise_se_ratio": [0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], + "stackwise_activation": ["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + "include_rescaling": False, + "input_image_shape": (224, 224, 3), + "alpha": 1 + } + self.input_data = np.ones((2, 224, 224, 3), dtype="float32") + + def test_backbone_basics(self): + self.run_backbone_test( + cls=MobileNetV3Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + expected_output_shape=(2, 7, 7, 1024), + run_mixed_precision_check=False, + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=MobileNetV3Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) \ No newline at end of file diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py new file mode 100644 index 0000000000..83fafe616e --- /dev/null +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py @@ -0,0 +1,111 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import keras + +from keras_nlp.src.api_export import keras_nlp_export +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.models.image_classifier import ImageClassifier + + +@keras_nlp_export("keras_nlp.models.MobileNetV3ImageClassifier") +class MobileNetV3ImageClassifier(ImageClassifier): + """MobileNetV3 image classifier task model. + + To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` + where `x` is a tensor and `y` is a integer from `[0, num_classes)`. + All `ImageClassifier` tasks include a `from_preset()` constructor which can + be used to load a pre-trained config and weights. + + Args: + backbone: A `keras_nlp.models.MobileNetV3Backbone` instance. + num_classes: int. The number of classes to predict. + activation: `None`, str or callable. The activation function to use on + the `Dense` layer. Set `activation=None` to return the output + logits. Defaults to `"softmax"`. + + Examples: + + Call `predict()` to run inference. + ```python + # Load preset and train + images = np.ones((2, 224, 224, 3), dtype="float32") + classifier = keras_nlp.models.MobileNetV3ImageClassifier.from_preset( + "mobilenet_v3_small_imagenet") + classifier.predict(images) + ``` +\ + Custom backbone. + ```python + images = np.ones((2, 224, 224, 3), dtype="float32") + labels = [0, 3] + model = MobileNetV3Backbone( + stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], + stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], + stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], + stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], + stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], + stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + include_rescaling=False, + ) + classifier = keras_nlp.models.MobileNetV3ImageClassifier( + backbone=backbone, + num_classes=4, + ) + classifier.fit(x=images, y=labels, batch_size=2) + ``` + """ + + backbone_cls = MobileNetV3Backbone + + def __init__( + self, + backbone, + num_classes, + activation="softmax", + preprocessor=None, # adding this dummy arg for saved model test + # TODO: once preprocessor flow is figured out, this needs to be updated + **kwargs, + ): + # === Layers === + self.backbone = backbone + self.output_dense = keras.layers.Dense( + num_classes, + activation=activation, + name="predictions", + ) + + # === Functional Model === + inputs = self.backbone.input + x = self.backbone(inputs) + outputs = self.output_dense(x) + super().__init__( + inputs=inputs, + outputs=outputs, + **kwargs, + ) + + # === Config === + self.num_classes = num_classes + self.activation = activation + + def get_config(self): + # Backbone serialized in `super` + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "activation": self.activation, + } + ) + return config diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py new file mode 100644 index 0000000000..27752775b6 --- /dev/null +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py @@ -0,0 +1,67 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest + +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import ( + MobileNetV3ImageClassifier, +) +from keras_nlp.src.tests.test_case import TestCase + + +class MobileNetV3ImageClassifierTest(TestCase): + def setUp(self): + # Setup model. + self.images = np.ones((2, 224, 224, 3), dtype="float32") + self.labels = [0, 3] + self.backbone = MobileNetV3Backbone( + + stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], + stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], + stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], + stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], + stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], + stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + include_rescaling=False, + input_shape=(224, 224, 3), + ) + self.init_kwargs = { + "backbone": self.backbone, + "num_classes": 2, + "activation": "softmax", + } + self.train_data = ( + self.images, + self.labels, + ) + + def test_classifier_basics(self): + pytest.skip( + reason="TODO: enable after preprocessor flow is figured out" + ) + self.run_task_test( + cls=MobileNetV3ImageClassifier, + init_kwargs=self.init_kwargs, + train_data=self.train_data, + expected_output_shape=(2, 2), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=MobileNetV3ImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) \ No newline at end of file From 65cc1f8973c2b364fbd23dfff5fd5677a03ff603 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 12:36:59 +0530 Subject: [PATCH 02/28] minor bug fixed in mobilenet_v3_backbone --- keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index f85e6efa67..6726733493 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -9,7 +9,7 @@ BN_MOMENTUM = 0.999 -@keras_cv_export("keras_nlp.models.MobileNetV3Backbone") +@keras_nlp_export("keras_nlp.models.MobileNetV3Backbone") class MobileNetV3Backbone(Backbone): """Instantiates the MobileNetV3 architecture. From d66cb9a916d3b5923aa39612e3f7e4dcdb903e35 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 13:52:59 +0530 Subject: [PATCH 03/28] formatting corrected --- keras_nlp/api/models/__init__.py | 4 +- .../mobilenet_v3/mobilenet_v3_backbone.py | 19 +++---- .../mobilenet_v3_backbone_test.py | 50 ++++++++++++++++--- .../mobilenet_v3_image_classifier.py | 4 +- .../mobilenet_v3_image_classifier_test.py | 49 +++++++++++++++--- 5 files changed, 103 insertions(+), 23 deletions(-) diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py index 5b957ee548..f8dd88a688 100644 --- a/keras_nlp/api/models/__init__.py +++ b/keras_nlp/api/models/__init__.py @@ -165,7 +165,9 @@ MistralPreprocessor, ) from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( + MobileNetV3Backbone, +) from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import ( MobileNetV3ImageClassifier, ) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index 6726733493..e170bdbb22 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -97,7 +97,7 @@ def __init__( x = apply_hard_swish(x) for stack_index in range(len(stackwise_filters)): - + x = apply_inverted_res_block( x, expansion=stackwise_expansion[stack_index], @@ -110,7 +110,7 @@ def __init__( activation=stackwise_activation[stack_index], expansion_index=stack_index, ) - + last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) x = keras.layers.Conv2D( @@ -309,6 +309,7 @@ def apply_inverted_res_block( return x + def SqueezeAndExcite2D( input, filters, @@ -336,22 +337,22 @@ def SqueezeAndExcite2D( # (...) input = tf.ones((1, 5, 5, 16), dtype=tf.float32) x = keras.layers.Conv2D(16, (3, 3))(input) - + # (...) ``` """ if not bottleneck_filters: - bottleneck_filters = (filters // 4) + bottleneck_filters = filters // 4 x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input) x = keras.layers.Conv2D( - bottleneck_filters, - (1, 1), - activation=self.squeeze_activation, + bottleneck_filters, + (1, 1), + activation=self.squeeze_activation, )(x) x = keras.layers.Conv2D( - self.filters, (1, 1), activation=self.excite_activation + self.filters, (1, 1), activation=self.excite_activation )(x) x = ops.multiply(x, input) - return x \ No newline at end of file + return x diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py index d106891ddf..e34790424a 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py @@ -15,22 +15,60 @@ import numpy as np import pytest -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( + MobileNetV3Backbone, +) from keras_nlp.src.tests.test_case import TestCase class MobileNetV3BackboneTest(TestCase): def setUp(self): self.init_kwargs = { - "stackwise_expansion": [1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], + "stackwise_expansion": [ + 1, + 72.0 / 16, + 88.0 / 24, + 4, + 6, + 6, + 3, + 3, + 6, + 6, + 6, + ], "stackwise_filters": [16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], "stackwise_kernel_size": [3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], "stackwise_stride": [2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], - "stackwise_se_ratio": [0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], - "stackwise_activation": ["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + "stackwise_se_ratio": [ + 0.25, + None, + None, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + "stackwise_activation": [ + "relu", + "relu", + "relu", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + ], "include_rescaling": False, "input_image_shape": (224, 224, 3), - "alpha": 1 + "alpha": 1, } self.input_data = np.ones((2, 224, 224, 3), dtype="float32") @@ -49,4 +87,4 @@ def test_saved_model(self): cls=MobileNetV3Backbone, init_kwargs=self.init_kwargs, input_data=self.input_data, - ) \ No newline at end of file + ) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py index 83fafe616e..0d2b91d740 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py @@ -14,8 +14,10 @@ import keras from keras_nlp.src.api_export import keras_nlp_export -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone from keras_nlp.src.models.image_classifier import ImageClassifier +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( + MobileNetV3Backbone, +) @keras_nlp_export("keras_nlp.models.MobileNetV3ImageClassifier") diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py index 27752775b6..f98abf6fb2 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py @@ -14,7 +14,9 @@ import numpy as np import pytest -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import MobileNetV3Backbone +from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( + MobileNetV3Backbone, +) from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import ( MobileNetV3ImageClassifier, ) @@ -27,13 +29,48 @@ def setUp(self): self.images = np.ones((2, 224, 224, 3), dtype="float32") self.labels = [0, 3] self.backbone = MobileNetV3Backbone( - - stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], + stackwise_expansion=[ + 1, + 72.0 / 16, + 88.0 / 24, + 4, + 6, + 6, + 3, + 3, + 6, + 6, + 6, + ], stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], - stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], - stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + stackwise_se_ratio=[ + 0.25, + None, + None, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + ], + stackwise_activation=[ + "relu", + "relu", + "relu", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + "hard_swish", + ], include_rescaling=False, input_shape=(224, 224, 3), ) @@ -64,4 +101,4 @@ def test_saved_model(self): cls=MobileNetV3ImageClassifier, init_kwargs=self.init_kwargs, input_data=self.images, - ) \ No newline at end of file + ) From 8821e0cb6f20f7615d7abd294e620e4db0acfafe Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 13:57:29 +0530 Subject: [PATCH 04/28] refactoring backbone --- keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index e170bdbb22..a60afe5a18 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -74,7 +74,7 @@ def __init__( alpha=1.0, **kwargs, ): - inputs = keras.layers.Input(shape=input_image_shape) + inputs = keras.layers.Input(shape=input_shape) x = inputs if include_rescaling: From 189f268590d46efddc924ed16f516049481b0bf6 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 14:29:29 +0530 Subject: [PATCH 05/28] correct_pad_downsample method added --- .../mobilenet_v3/mobilenet_v3_backbone.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index a60afe5a18..7ff2b7fb69 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -261,7 +261,7 @@ def apply_inverted_res_block( if stride == 2: x = keras.layers.ZeroPadding2D( - padding=utils.correct_pad_downsample(x, kernel_size), + padding=correct_pad_downsample(x, kernel_size), name=prefix + "depthwise_pad", )(x) @@ -356,3 +356,28 @@ def SqueezeAndExcite2D( x = ops.multiply(x, input) return x + + +def correct_pad_downsample(inputs, kernel_size): + """Returns a tuple for zero-padding for 2D convolution with downsampling. + + Args: + inputs: Input tensor. + kernel_size: An integer or tuple/list of 2 integers. + + Returns: + A tuple. + """ + img_dim = 1 + input_size = inputs.shape[img_dim : (img_dim + 2)] + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if input_size[0] is None: + adjust = (1, 1) + else: + adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) + correct = (kernel_size[0] // 2, kernel_size[1] // 2) + return ( + (correct[0] - adjust[0], correct[0]), + (correct[1] - adjust[1], correct[1]), + ) From 709beaf88ca98667f20440d8483cb6a8753a0006 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 14:48:42 +0530 Subject: [PATCH 06/28] refactoring backbone --- keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index 7ff2b7fb69..e4ec7d41dd 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -348,10 +348,10 @@ def SqueezeAndExcite2D( x = keras.layers.Conv2D( bottleneck_filters, (1, 1), - activation=self.squeeze_activation, + activation=squeeze_activation, )(x) x = keras.layers.Conv2D( - self.filters, (1, 1), activation=self.excite_activation + filters, (1, 1), activation=excite_activation )(x) x = ops.multiply(x, input) From 4f04438806882059f06aaaa5a18c4a6cee23f935 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 15:11:05 +0530 Subject: [PATCH 07/28] parameters updated --- keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py | 5 +---- .../src/models/mobilenet_v3/mobilenet_v3_backbone_test.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index e4ec7d41dd..1f0f0d4340 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -3,7 +3,6 @@ from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.backbone import Backbone - CHANNEL_AXIS = -1 BN_EPSILON = 1e-3 BN_MOMENTUM = 0.999 @@ -350,9 +349,7 @@ def SqueezeAndExcite2D( (1, 1), activation=squeeze_activation, )(x) - x = keras.layers.Conv2D( - filters, (1, 1), activation=excite_activation - )(x) + x = keras.layers.Conv2D(filters, (1, 1), activation=excite_activation)(x) x = ops.multiply(x, input) return x diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py index e34790424a..ecc3af6a62 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py @@ -67,7 +67,7 @@ def setUp(self): "hard_swish", ], "include_rescaling": False, - "input_image_shape": (224, 224, 3), + "input_shape": (224, 224, 3), "alpha": 1, } self.input_data = np.ones((2, 224, 224, 3), dtype="float32") From 9f6af774186168977a79a1e739e1f2a5705e03bb Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 15:44:56 +0530 Subject: [PATCH 08/28] Testcaseupdated, expected output shape corrected --- keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py index ecc3af6a62..0343db5b42 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py @@ -77,7 +77,7 @@ def test_backbone_basics(self): cls=MobileNetV3Backbone, init_kwargs=self.init_kwargs, input_data=self.input_data, - expected_output_shape=(2, 7, 7, 1024), + expected_output_shape=(2, 7, 7, 576), run_mixed_precision_check=False, ) From d590dfadb79bd336420a8a9968ebbf26d606cf1b Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 19 Aug 2024 16:30:33 +0530 Subject: [PATCH 09/28] code formatted with black --- keras_nlp/src/models/mobilenet_v3/__init__.py | 2 +- .../models/mobilenet_v3/mobilenet_v3_backbone.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet_v3/__init__.py b/keras_nlp/src/models/mobilenet_v3/__init__.py index 2351a1b7b4..3364a6bd16 100644 --- a/keras_nlp/src/models/mobilenet_v3/__init__.py +++ b/keras_nlp/src/models/mobilenet_v3/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index 1f0f0d4340..ae76abbd24 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -1,5 +1,19 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import keras from keras import ops + from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.backbone import Backbone From b26c318fb3781748a9300a459766b32673266c11 Mon Sep 17 00:00:00 2001 From: ushareng Date: Tue, 20 Aug 2024 12:02:46 +0530 Subject: [PATCH 10/28] testcase updated --- .../mobilenet_v3_backbone_test.py | 30 ++----------------- .../mobilenet_v3_image_classifier.py | 2 +- .../mobilenet_v3_image_classifier_test.py | 30 ++----------------- 3 files changed, 7 insertions(+), 55 deletions(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py index 0343db5b42..cbef4f2845 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py @@ -26,44 +26,20 @@ def setUp(self): self.init_kwargs = { "stackwise_expansion": [ 1, - 72.0 / 16, - 88.0 / 24, 4, 6, - 6, - 3, - 3, - 6, - 6, - 6, ], - "stackwise_filters": [16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], - "stackwise_kernel_size": [3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], - "stackwise_stride": [2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], + "stackwise_filters": [4, 8, 16], + "stackwise_kernel_size": [3, 3, 5], + "stackwise_stride": [2, 2, 1], "stackwise_se_ratio": [ 0.25, None, - None, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, 0.25, ], "stackwise_activation": [ "relu", "relu", - "relu", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", "hard_swish", ], "include_rescaling": False, diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py index 0d2b91d740..a7b674ce67 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py @@ -46,7 +46,7 @@ class MobileNetV3ImageClassifier(ImageClassifier): "mobilenet_v3_small_imagenet") classifier.predict(images) ``` -\ + Custom backbone. ```python images = np.ones((2, 224, 224, 3), dtype="float32") diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py index f98abf6fb2..e9500ca853 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py @@ -31,44 +31,20 @@ def setUp(self): self.backbone = MobileNetV3Backbone( stackwise_expansion=[ 1, - 72.0 / 16, - 88.0 / 24, 4, 6, - 6, - 3, - 3, - 6, - 6, - 6, ], - stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], - stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], - stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], + stackwise_filters=[4, 8, 16], + stackwise_kernel_size=[3, 3, 5], + stackwise_stride=[2, 2, 1], stackwise_se_ratio=[ 0.25, None, - None, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, 0.25, ], stackwise_activation=[ "relu", "relu", - "relu", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", - "hard_swish", "hard_swish", ], include_rescaling=False, From 1ed96a9920ae371d35390479b410219d58a767e1 Mon Sep 17 00:00:00 2001 From: ushareng Date: Tue, 20 Aug 2024 12:34:16 +0530 Subject: [PATCH 11/28] refactoring and description added --- .../mobilenet_v3/mobilenet_v3_backbone.py | 26 ++++++++++++------- .../mobilenet_v3_backbone_test.py | 20 +++----------- .../mobilenet_v3_image_classifier.py | 14 +++++----- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index ae76abbd24..c803fab02b 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -60,22 +60,22 @@ class MobileNetV3Backbone(Backbone): input_data = tf.ones(shape=(8, 224, 224, 3)) # Randomly initialized backbone with a custom config + model = MobileNetV3Backbone( - stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], - stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], - stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], - stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], - stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], - stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], - include_rescaling=False, + "stackwise_expansion": [1, 4, 6], + "stackwise_filters"= [4, 8, 16], + "stackwise_kernel_size"= [3, 3, 5], + "stackwise_stride"= [2, 2, 1], + "stackwise_se_ratio"= [ 0.25, None, 0.25], + "stackwise_activation"= ["relu", "relu", "hard_swish"], + "include_rescaling"= False, ) output = model(input_data) ``` - """ # noqa: E501 + """ def __init__( self, - *, stackwise_expansion, stackwise_filters, stackwise_kernel_size, @@ -87,6 +87,7 @@ def __init__( alpha=1.0, **kwargs, ): + # === Functional Model === inputs = keras.layers.Input(shape=input_shape) x = inputs @@ -143,6 +144,7 @@ def __init__( super().__init__(inputs=inputs, outputs=x, **kwargs) + # === Config === self.stackwise_expansion = stackwise_expansion self.stackwise_filters = stackwise_filters self.stackwise_kernel_size = stackwise_kernel_size @@ -331,6 +333,12 @@ def SqueezeAndExcite2D( excite_activation="sigmoid", ): """ + Description: + This layer applies a content-aware mechanism to adaptively assign + channel-wise weights. It uses global average pooling to compress + feature maps into single values, which are then processed by + two Conv1D layers: the first reduces the dimensionality, and + the second restores it. Args: filters: Number of input and output filters. The number of input and output filters is same. diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py index cbef4f2845..295a7014e6 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py @@ -24,24 +24,12 @@ class MobileNetV3BackboneTest(TestCase): def setUp(self): self.init_kwargs = { - "stackwise_expansion": [ - 1, - 4, - 6, - ], + "stackwise_expansion": [1, 4, 6], "stackwise_filters": [4, 8, 16], "stackwise_kernel_size": [3, 3, 5], "stackwise_stride": [2, 2, 1], - "stackwise_se_ratio": [ - 0.25, - None, - 0.25, - ], - "stackwise_activation": [ - "relu", - "relu", - "hard_swish", - ], + "stackwise_se_ratio": [0.25, None, 0.25], + "stackwise_activation": ["relu", "relu", "hard_swish"], "include_rescaling": False, "input_shape": (224, 224, 3), "alpha": 1, @@ -53,7 +41,7 @@ def test_backbone_basics(self): cls=MobileNetV3Backbone, init_kwargs=self.init_kwargs, input_data=self.input_data, - expected_output_shape=(2, 7, 7, 576), + expected_output_shape=(2, 28, 28, 96), run_mixed_precision_check=False, ) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py index a7b674ce67..2d8faa83d0 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py @@ -52,13 +52,13 @@ class MobileNetV3ImageClassifier(ImageClassifier): images = np.ones((2, 224, 224, 3), dtype="float32") labels = [0, 3] model = MobileNetV3Backbone( - stackwise_expansion=[1, 72.0 / 16, 88.0 / 24, 4, 6, 6, 3, 3, 6, 6, 6], - stackwise_filters=[16, 24, 24, 40, 40, 40, 48, 48, 96, 96, 96], - stackwise_kernel_size=[3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5], - stackwise_stride=[2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1], - stackwise_se_ratio=[0.25, None, None, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], - stackwise_activation=["relu", "relu", "relu", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], - include_rescaling=False, + "stackwise_expansion": [1, 4, 6], + "stackwise_filters"= [4, 8, 16], + "stackwise_kernel_size"= [3, 3, 5], + "stackwise_stride"= [2, 2, 1], + "stackwise_se_ratio"= [ 0.25, None, 0.25], + "stackwise_activation"= ["relu", "relu", "hard_swish"], + "include_rescaling"= False, ) classifier = keras_nlp.models.MobileNetV3ImageClassifier( backbone=backbone, From dd2554d53b8c9a3edbb614680800fb5b58b3b62a Mon Sep 17 00:00:00 2001 From: ushareng Date: Tue, 20 Aug 2024 18:51:59 +0530 Subject: [PATCH 12/28] comments updated --- .../models/mobilenet_v3/mobilenet_v3_backbone.py | 14 +++++++------- .../mobilenet_v3/mobilenet_v3_image_classifier.py | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py index c803fab02b..4dc532b5c3 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py @@ -62,13 +62,13 @@ class MobileNetV3Backbone(Backbone): # Randomly initialized backbone with a custom config model = MobileNetV3Backbone( - "stackwise_expansion": [1, 4, 6], - "stackwise_filters"= [4, 8, 16], - "stackwise_kernel_size"= [3, 3, 5], - "stackwise_stride"= [2, 2, 1], - "stackwise_se_ratio"= [ 0.25, None, 0.25], - "stackwise_activation"= ["relu", "relu", "hard_swish"], - "include_rescaling"= False, + stackwise_expansion = [1, 4, 6], + stackwise_filters = [4, 8, 16], + stackwise_kernel_size = [3, 3, 5], + stackwise_stride = [2, 2, 1], + stackwise_se_ratio = [ 0.25, None, 0.25], + stackwise_activation = ["relu", "relu", "hard_swish"], + include_rescaling = False, ) output = model(input_data) ``` diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py index 2d8faa83d0..77e677ce76 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py +++ b/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py @@ -52,13 +52,13 @@ class MobileNetV3ImageClassifier(ImageClassifier): images = np.ones((2, 224, 224, 3), dtype="float32") labels = [0, 3] model = MobileNetV3Backbone( - "stackwise_expansion": [1, 4, 6], - "stackwise_filters"= [4, 8, 16], - "stackwise_kernel_size"= [3, 3, 5], - "stackwise_stride"= [2, 2, 1], - "stackwise_se_ratio"= [ 0.25, None, 0.25], - "stackwise_activation"= ["relu", "relu", "hard_swish"], - "include_rescaling"= False, + stackwise_expansion = [1, 4, 6], + stackwise_filters = [4, 8, 16], + stackwise_kernel_size = [3, 3, 5], + stackwise_stride = [2, 2, 1], + stackwise_se_ratio = [ 0.25, None, 0.25], + stackwise_activation = ["relu", "relu", "hard_swish"], + include_rescaling = False, ) classifier = keras_nlp.models.MobileNetV3ImageClassifier( backbone=backbone, From 59cf9e3c15ad51fd1b2558a08e7d0d56a0e57384 Mon Sep 17 00:00:00 2001 From: ushareng Date: Wed, 21 Aug 2024 02:52:38 +0530 Subject: [PATCH 13/28] added mobilenet v1 and v2 --- keras_nlp/api/models/__init__.py | 8 +- .../{mobilenet_v3 => mobilenet}/__init__.py | 0 .../mobilenet_backbone.py} | 112 +++++++++++++----- .../mobilenet_backbone_test.py} | 11 +- .../mobilenet_image_classifier.py} | 19 ++- .../mobilenet_image_classifier_test.py} | 17 ++- 6 files changed, 108 insertions(+), 59 deletions(-) rename keras_nlp/src/models/{mobilenet_v3 => mobilenet}/__init__.py (100%) rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_backbone.py => mobilenet/mobilenet_backbone.py} (79%) rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_backbone_test.py => mobilenet/mobilenet_backbone_test.py} (88%) rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_image_classifier.py => mobilenet/mobilenet_image_classifier.py} (86%) rename keras_nlp/src/models/{mobilenet_v3/mobilenet_v3_image_classifier_test.py => mobilenet/mobilenet_image_classifier_test.py} (84%) diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py index f8dd88a688..c5ca6e3409 100644 --- a/keras_nlp/api/models/__init__.py +++ b/keras_nlp/api/models/__init__.py @@ -165,11 +165,9 @@ MistralPreprocessor, ) from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( - MobileNetV3Backbone, -) -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import ( - MobileNetV3ImageClassifier, +from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone +from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import ( + MobileNetImageClassifier, ) from keras_nlp.src.models.opt.opt_backbone import OPTBackbone from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM diff --git a/keras_nlp/src/models/mobilenet_v3/__init__.py b/keras_nlp/src/models/mobilenet/__init__.py similarity index 100% rename from keras_nlp/src/models/mobilenet_v3/__init__.py rename to keras_nlp/src/models/mobilenet/__init__.py diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py similarity index 79% rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py rename to keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 4dc532b5c3..b052c1eeda 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -22,11 +22,17 @@ BN_MOMENTUM = 0.999 -@keras_nlp_export("keras_nlp.models.MobileNetV3Backbone") -class MobileNetV3Backbone(Backbone): - """Instantiates the MobileNetV3 architecture. +@keras_nlp_export("keras_nlp.models.MobileNetBackbone") +class MobileNetBackbone(Backbone): + """Instantiates the MobileNet architecture. References: + - [MobileNets: Efficient Convolutional Neural Networks + for Mobile Vision Applications]( + https://arxiv.org/abs/1704.04861) + - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( + https://arxiv.org/abs/1801.04381) (CVPR 2018) + - [Based on the Original keras.applications MobileNetv2](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v2.py) - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) - [Based on the Original keras.applications MobileNetv3](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v3.py) @@ -46,7 +52,7 @@ class MobileNetV3Backbone(Backbone): layer. input_shape: optional shape tuple, defaults to (None, None, 3). alpha: float, controls the width of the network. This is known as the - depth multiplier in the MobileNetV3 paper, but the name is kept for + depth multiplier in the MobileNet paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. @@ -54,6 +60,7 @@ class MobileNetV3Backbone(Backbone): of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. + version: MobileNet version Example: ```python @@ -61,7 +68,7 @@ class MobileNetV3Backbone(Backbone): # Randomly initialized backbone with a custom config - model = MobileNetV3Backbone( + model = MobileNetBackbone( stackwise_expansion = [1, 4, 6], stackwise_filters = [4, 8, 16], stackwise_kernel_size = [3, 3, 5], @@ -69,6 +76,7 @@ class MobileNetV3Backbone(Backbone): stackwise_se_ratio = [ 0.25, None, 0.25], stackwise_activation = ["relu", "relu", "hard_swish"], include_rescaling = False, + version = 'v3' ) output = model(input_data) ``` @@ -85,17 +93,32 @@ def __init__( include_rescaling, input_shape=(224, 224, 3), alpha=1.0, + version="v3", **kwargs, ): # === Functional Model === + if version not in ["v1", "v2", "v3"]: + raise ValueError( + "The `version` argument should be either `v1` (for MobileNet)" + "or `v2` ( for MobileNetV2)" + "or v3 (MobileNetV3), default version is `v3`" + f"Received `version={version}`" + ) inputs = keras.layers.Input(shape=input_shape) x = inputs if include_rescaling: x = keras.layers.Rescaling(scale=1 / 255)(x) + first_ch = ( + 32 + if version == "v1" + else ( + adjust_channels(32 * alpha) if version == "v2" else 16 + ) # This is for 'v3' + ) x = keras.layers.Conv2D( - 16, + first_ch, kernel_size=3, strides=(2, 2), padding="same", @@ -108,7 +131,11 @@ def __init__( momentum=BN_MOMENTUM, name="Conv_BatchNorm", )(x) - x = apply_hard_swish(x) + + if version == "v3": + x = apply_hard_swish(x) + else: + x = keras.layers.ReLU(6.0)(x) for stack_index in range(len(stackwise_filters)): @@ -120,27 +147,41 @@ def __init__( ), kernel_size=stackwise_kernel_size[stack_index], stride=stackwise_stride[stack_index], - se_ratio=stackwise_se_ratio[stack_index], + se_ratio=( + stackwise_se_ratio[stack_index] if version == "v3" else 0 + ), activation=stackwise_activation[stack_index], - expansion_index=stack_index, + expansion_index=0 if version == "v1" else stack_index, + version=version, ) - last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) - - x = keras.layers.Conv2D( - last_conv_ch, - kernel_size=1, - padding="same", - use_bias=False, - name="Conv_1", - )(x) - x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name="Conv_1_BatchNorm", - )(x) - x = apply_hard_swish(x) + if version == "v3": + last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) + elif version == "v2": + if alpha > 1.0: + last_conv_ch = adjust_channels(1280 * alpha) + else: + last_conv_ch = 1280 + + if version != "v1": + x = keras.layers.Conv2D( + last_conv_ch, + kernel_size=1, + padding="same", + use_bias=False, + name="Conv_1", + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="Conv_1_BatchNorm", + )(x) + + if version == "v3": + x = apply_hard_swish(x) + else: + x = keras.layers.ReLU(6.0)(x) super().__init__(inputs=inputs, outputs=x, **kwargs) @@ -153,6 +194,7 @@ def __init__( self.stackwise_activation = stackwise_activation self.include_rescaling = include_rescaling self.alpha = alpha + self.version = version def get_config(self): config = super().get_config() @@ -167,6 +209,7 @@ def get_config(self): "include_rescaling": self.include_rescaling, "input_shape": self.input_shape[1:], "alpha": self.alpha, + "version": self.version, } ) return config @@ -226,6 +269,7 @@ def apply_inverted_res_block( se_ratio, activation, expansion_index, + version="v3", ): """An Inverted Residual Block. @@ -242,6 +286,7 @@ def apply_inverted_res_block( expansion_index: integer, a unique identification if you want to use expanded convolutions. If greater than 0, an additional Conv+BN layer is added after the expanded convolutional layer. + version: MobileNet architecture version, v1, v2 or v3 Returns: the updated input tensor. @@ -272,7 +317,10 @@ def apply_inverted_res_block( momentum=BN_MOMENTUM, name=prefix + "expand_BatchNorm", )(x) - x = activation(x) + if version == "v3": + x = activation(x) + else: + x = keras.layers.ReLU(6.0)(x) if stride == 2: x = keras.layers.ZeroPadding2D( @@ -293,7 +341,10 @@ def apply_inverted_res_block( momentum=BN_MOMENTUM, name=prefix + "depthwise_BatchNorm", )(x) - x = activation(x) + if version == "v3": + x = activation(x) + else: + x = keras.layers.ReLU(6.0)(x) if se_ratio: se_filters = adjust_channels(infilters * expansion) @@ -319,8 +370,11 @@ def apply_inverted_res_block( name=prefix + "project_BatchNorm", )(x) - if stride == 1 and infilters == filters: - x = keras.layers.Add(name=prefix + "Add")([shortcut, x]) + if version == "v1": + x = keras.layers.ReLU(6.0)(x) + else: + if stride == 1 and infilters == filters: + x = keras.layers.Add(name=prefix + "Add")([shortcut, x]) return x diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py similarity index 88% rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py rename to keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index 295a7014e6..68671c6b3e 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -15,13 +15,11 @@ import numpy as np import pytest -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( - MobileNetV3Backbone, -) +from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone from keras_nlp.src.tests.test_case import TestCase -class MobileNetV3BackboneTest(TestCase): +class MobileNetBackboneTest(TestCase): def setUp(self): self.init_kwargs = { "stackwise_expansion": [1, 4, 6], @@ -33,12 +31,13 @@ def setUp(self): "include_rescaling": False, "input_shape": (224, 224, 3), "alpha": 1, + "version": "v3", } self.input_data = np.ones((2, 224, 224, 3), dtype="float32") def test_backbone_basics(self): self.run_backbone_test( - cls=MobileNetV3Backbone, + cls=MobileNetBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, expected_output_shape=(2, 28, 28, 96), @@ -48,7 +47,7 @@ def test_backbone_basics(self): @pytest.mark.large def test_saved_model(self): self.run_model_saving_test( - cls=MobileNetV3Backbone, + cls=MobileNetBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, ) diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py similarity index 86% rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py rename to keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py index 77e677ce76..ed6239ee26 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py @@ -15,13 +15,11 @@ from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.image_classifier import ImageClassifier -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( - MobileNetV3Backbone, -) +from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone -@keras_nlp_export("keras_nlp.models.MobileNetV3ImageClassifier") -class MobileNetV3ImageClassifier(ImageClassifier): +@keras_nlp_export("keras_nlp.models.MobileNetImageClassifier") +class MobileNetImageClassifier(ImageClassifier): """MobileNetV3 image classifier task model. To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` @@ -30,7 +28,7 @@ class MobileNetV3ImageClassifier(ImageClassifier): be used to load a pre-trained config and weights. Args: - backbone: A `keras_nlp.models.MobileNetV3Backbone` instance. + backbone: A `keras_nlp.models.MobileNetBackbone` instance. num_classes: int. The number of classes to predict. activation: `None`, str or callable. The activation function to use on the `Dense` layer. Set `activation=None` to return the output @@ -42,7 +40,7 @@ class MobileNetV3ImageClassifier(ImageClassifier): ```python # Load preset and train images = np.ones((2, 224, 224, 3), dtype="float32") - classifier = keras_nlp.models.MobileNetV3ImageClassifier.from_preset( + classifier = keras_nlp.models.MobileNetImageClassifier.from_preset( "mobilenet_v3_small_imagenet") classifier.predict(images) ``` @@ -51,7 +49,7 @@ class MobileNetV3ImageClassifier(ImageClassifier): ```python images = np.ones((2, 224, 224, 3), dtype="float32") labels = [0, 3] - model = MobileNetV3Backbone( + model = MobileNetBackbone( stackwise_expansion = [1, 4, 6], stackwise_filters = [4, 8, 16], stackwise_kernel_size = [3, 3, 5], @@ -59,8 +57,9 @@ class MobileNetV3ImageClassifier(ImageClassifier): stackwise_se_ratio = [ 0.25, None, 0.25], stackwise_activation = ["relu", "relu", "hard_swish"], include_rescaling = False, + "version"="v3", ) - classifier = keras_nlp.models.MobileNetV3ImageClassifier( + classifier = keras_nlp.models.MobileNetImageClassifier( backbone=backbone, num_classes=4, ) @@ -68,7 +67,7 @@ class MobileNetV3ImageClassifier(ImageClassifier): ``` """ - backbone_cls = MobileNetV3Backbone + backbone_cls = MobileNetBackbone def __init__( self, diff --git a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py similarity index 84% rename from keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py rename to keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index e9500ca853..d556478840 100644 --- a/keras_nlp/src/models/mobilenet_v3/mobilenet_v3_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -14,21 +14,19 @@ import numpy as np import pytest -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_backbone import ( - MobileNetV3Backbone, -) -from keras_nlp.src.models.mobilenet_v3.mobilenet_v3_image_classifier import ( - MobileNetV3ImageClassifier, +from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone +from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import ( + MobileNetImageClassifier, ) from keras_nlp.src.tests.test_case import TestCase -class MobileNetV3ImageClassifierTest(TestCase): +class MobileNetImageClassifierTest(TestCase): def setUp(self): # Setup model. self.images = np.ones((2, 224, 224, 3), dtype="float32") self.labels = [0, 3] - self.backbone = MobileNetV3Backbone( + self.backbone = MobileNetBackbone( stackwise_expansion=[ 1, 4, @@ -49,6 +47,7 @@ def setUp(self): ], include_rescaling=False, input_shape=(224, 224, 3), + version="v3", ) self.init_kwargs = { "backbone": self.backbone, @@ -65,7 +64,7 @@ def test_classifier_basics(self): reason="TODO: enable after preprocessor flow is figured out" ) self.run_task_test( - cls=MobileNetV3ImageClassifier, + cls=MobileNetImageClassifier, init_kwargs=self.init_kwargs, train_data=self.train_data, expected_output_shape=(2, 2), @@ -74,7 +73,7 @@ def test_classifier_basics(self): @pytest.mark.large def test_saved_model(self): self.run_model_saving_test( - cls=MobileNetV3ImageClassifier, + cls=MobileNetImageClassifier, init_kwargs=self.init_kwargs, input_data=self.images, ) From 091752e17a5695d3af6e3108165f6fd11867a2d9 Mon Sep 17 00:00:00 2001 From: ushareng Date: Wed, 21 Aug 2024 03:08:56 +0530 Subject: [PATCH 14/28] merge conflict resolved --- keras_nlp/api/models/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keras_nlp/api/models/__init__.py b/keras_nlp/api/models/__init__.py index de678ea262..17b00c1f05 100644 --- a/keras_nlp/api/models/__init__.py +++ b/keras_nlp/api/models/__init__.py @@ -165,16 +165,16 @@ MistralPreprocessor, ) from keras_nlp.src.models.mistral.mistral_tokenizer import MistralTokenizer -from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone -from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import ( - MobileNetImageClassifier, -) from keras_nlp.src.models.mix_transformer.mix_transformer_backbone import ( MiTBackbone, ) from keras_nlp.src.models.mix_transformer.mix_transformer_classifier import ( MiTImageClassifier, ) +from keras_nlp.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone +from keras_nlp.src.models.mobilenet.mobilenet_image_classifier import ( + MobileNetImageClassifier, +) from keras_nlp.src.models.opt.opt_backbone import OPTBackbone from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import ( From eeecee6edda58eb6fc719b986ed5dc734ec2df97 Mon Sep 17 00:00:00 2001 From: ushareng Date: Wed, 21 Aug 2024 15:02:23 +0530 Subject: [PATCH 15/28] version arg removed, and config options added --- .../models/mobilenet/mobilenet_backbone.py | 265 ++++++++++++------ .../mobilenet/mobilenet_backbone_test.py | 7 +- .../mobilenet/mobilenet_image_classifier.py | 4 +- .../mobilenet_image_classifier_test.py | 23 +- 4 files changed, 195 insertions(+), 104 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index b052c1eeda..ecce30f6a2 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -25,6 +25,12 @@ @keras_nlp_export("keras_nlp.models.MobileNetBackbone") class MobileNetBackbone(Backbone): """Instantiates the MobileNet architecture. + MobileNet is a lightweight convolutional neural network (CNN) + optimized for mobile and edge devices, striking a balance between + accuracy and efficiency. By employing depthwise separable convolutions + and techniques like Squeeze-and-Excitation (SE) blocks in later versions, + MobileNet models are highly suitable for real-time applications on + resource-constrained devices. References: - [MobileNets: Efficient Convolutional Neural Networks @@ -32,13 +38,8 @@ class MobileNetBackbone(Backbone): https://arxiv.org/abs/1704.04861) - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( https://arxiv.org/abs/1801.04381) (CVPR 2018) - - [Based on the Original keras.applications MobileNetv2](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v2.py) - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) - - [Based on the Original keras.applications MobileNetv3](https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet_v3.py) - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). Args: stackwise_expansion: list of ints or floats, the expansion ratio for @@ -50,17 +51,25 @@ class MobileNetBackbone(Backbone): include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(scale=1 / 255)` layer. - input_shape: optional shape tuple, defaults to (None, None, 3). - alpha: float, controls the width of the network. This is known as the - depth multiplier in the MobileNet paper, but the name is kept for - consistency with MobileNetV1 in Keras. - - If `alpha` < 1.0, proportionally decreases the number + image_shape: optional shape tuple, defaults to (None, None, 3). + depth_multiplier: float, controls the width of the network. + - If `depth_multiplier` < 1.0, proportionally decreases the number of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number + - If `depth_multiplier` > 1.0, proportionally increases the number of filters in each layer. - - If `alpha` = 1, default number of filters from the paper + - If `depth_multiplier` = 1, default number of filters from the paper are used at each layer. - version: MobileNet version + input_filter: number of filters in first convolution layer + output_filter: specifies whether to add conv and batch_norm in the end, + if set to None, it will not add these layers in the end. + 'None' for MobileNetV1 + activation: activation function to be used + 'hard_swish' for MobileNetV3, + 'relu6' for MobileNetV1 and MobileNetV2 + inverted_res_block: whether to use inverted residual blocks or not, + 'False' for MobileNetV1, + 'True' for MobileNetV2 and MobileNetV3 + Example: ```python @@ -74,9 +83,12 @@ class MobileNetBackbone(Backbone): stackwise_kernel_size = [3, 3, 5], stackwise_stride = [2, 2, 1], stackwise_se_ratio = [ 0.25, None, 0.25], - stackwise_activation = ["relu", "relu", "hard_swish"], + stackwise_activation = ["relu", "relu6", "hard_swish"], include_rescaling = False, - version = 'v3' + output_filter=1280, + activation="hard_swish", + inverted_res_block=True, + ) output = model(input_data) ``` @@ -91,34 +103,34 @@ def __init__( stackwise_se_ratio, stackwise_activation, include_rescaling, - input_shape=(224, 224, 3), - alpha=1.0, - version="v3", + output_filter, + activation, + inverted_res_block, + depth_multiplier=1.0, + input_filter=16, + image_shape=(224, 224, 3), **kwargs, ): + activation_str = activation + if isinstance(activation, str): + if activation == "hard_swish": + activation = apply_hard_swish + elif activation == "relu6": + activation = apply_relu6 + else: + activation = keras.activations.get(activation) + # === Functional Model === - if version not in ["v1", "v2", "v3"]: - raise ValueError( - "The `version` argument should be either `v1` (for MobileNet)" - "or `v2` ( for MobileNetV2)" - "or v3 (MobileNetV3), default version is `v3`" - f"Received `version={version}`" - ) - inputs = keras.layers.Input(shape=input_shape) + + inputs = keras.layers.Input(shape=image_shape) x = inputs if include_rescaling: x = keras.layers.Rescaling(scale=1 / 255)(x) - first_ch = ( - 32 - if version == "v1" - else ( - adjust_channels(32 * alpha) if version == "v2" else 16 - ) # This is for 'v3' - ) + input_filter = adjust_channels(input_filter) x = keras.layers.Conv2D( - first_ch, + input_filter, kernel_size=3, strides=(2, 2), padding="same", @@ -132,38 +144,45 @@ def __init__( name="Conv_BatchNorm", )(x) - if version == "v3": - x = apply_hard_swish(x) - else: - x = keras.layers.ReLU(6.0)(x) + x = activation(x) for stack_index in range(len(stackwise_filters)): - x = apply_inverted_res_block( - x, - expansion=stackwise_expansion[stack_index], - filters=adjust_channels( - (stackwise_filters[stack_index]) * alpha - ), - kernel_size=stackwise_kernel_size[stack_index], - stride=stackwise_stride[stack_index], - se_ratio=( - stackwise_se_ratio[stack_index] if version == "v3" else 0 - ), - activation=stackwise_activation[stack_index], - expansion_index=0 if version == "v1" else stack_index, - version=version, - ) - - if version == "v3": - last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) - elif version == "v2": - if alpha > 1.0: - last_conv_ch = adjust_channels(1280 * alpha) + if inverted_res_block: + x = apply_inverted_res_block( + x, + expansion=stackwise_expansion[stack_index], + filters=adjust_channels( + (stackwise_filters[stack_index]) * depth_multiplier + ), + kernel_size=stackwise_kernel_size[stack_index], + stride=stackwise_stride[stack_index], + se_ratio=( + stackwise_se_ratio[stack_index] + if activation_str == "hard_swish" + else 0 + ), + activation=stackwise_activation[stack_index], + expansion_index=stack_index, + ) + else: + x = apply_depthwise_conv_block( + x, + filters=adjust_channels( + (stackwise_filters[stack_index]) * depth_multiplier + ), + kernel_size=3, + stride=stackwise_stride[stack_index], + depth_multiplier=depth_multiplier, + block_id=stack_index, + ) + + if output_filter is not None: + if activation_str == "hard_swish": + last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) else: - last_conv_ch = 1280 + last_conv_ch = output_filter - if version != "v1": x = keras.layers.Conv2D( last_conv_ch, kernel_size=1, @@ -178,10 +197,7 @@ def __init__( name="Conv_1_BatchNorm", )(x) - if version == "v3": - x = apply_hard_swish(x) - else: - x = keras.layers.ReLU(6.0)(x) + x = activation(x) super().__init__(inputs=inputs, outputs=x, **kwargs) @@ -193,8 +209,12 @@ def __init__( self.stackwise_se_ratio = stackwise_se_ratio self.stackwise_activation = stackwise_activation self.include_rescaling = include_rescaling - self.alpha = alpha - self.version = version + self.depth_multiplier = depth_multiplier + self.input_filter = input_filter + self.output_filter = output_filter + self.activation = activation + self.inverted_res_block = inverted_res_block + self.image_shape = image_shape[1:] def get_config(self): config = super().get_config() @@ -207,9 +227,12 @@ def get_config(self): "stackwise_se_ratio": self.stackwise_se_ratio, "stackwise_activation": self.stackwise_activation, "include_rescaling": self.include_rescaling, - "input_shape": self.input_shape[1:], - "alpha": self.alpha, - "version": self.version, + "image_shape": self.image_shape, + "depth_multiplier": self.depth_multiplier, + "input_filter": self.input_filter, + "output_filter": self.output_filter, + "activation": self.activation, + "inverted_res_block": self.inverted_res_block, } ) return config @@ -260,6 +283,10 @@ def apply_hard_swish(x): return keras.layers.Multiply()([x, apply_hard_sigmoid(x)]) +def apply_relu6(x): + return keras.layers.ReLU(6.0)(x) + + def apply_inverted_res_block( x, expansion, @@ -269,7 +296,6 @@ def apply_inverted_res_block( se_ratio, activation, expansion_index, - version="v3", ): """An Inverted Residual Block. @@ -286,7 +312,6 @@ def apply_inverted_res_block( expansion_index: integer, a unique identification if you want to use expanded convolutions. If greater than 0, an additional Conv+BN layer is added after the expanded convolutional layer. - version: MobileNet architecture version, v1, v2 or v3 Returns: the updated input tensor. @@ -294,6 +319,8 @@ def apply_inverted_res_block( if isinstance(activation, str): if activation == "hard_swish": activation = apply_hard_swish + elif activation == "relu6": + activation = apply_relu6 else: activation = keras.activations.get(activation) @@ -317,10 +344,7 @@ def apply_inverted_res_block( momentum=BN_MOMENTUM, name=prefix + "expand_BatchNorm", )(x) - if version == "v3": - x = activation(x) - else: - x = keras.layers.ReLU(6.0)(x) + x = activation(x) if stride == 2: x = keras.layers.ZeroPadding2D( @@ -341,10 +365,7 @@ def apply_inverted_res_block( momentum=BN_MOMENTUM, name=prefix + "depthwise_BatchNorm", )(x) - if version == "v3": - x = activation(x) - else: - x = keras.layers.ReLU(6.0)(x) + x = activation(x) if se_ratio: se_filters = adjust_channels(infilters * expansion) @@ -370,11 +391,85 @@ def apply_inverted_res_block( name=prefix + "project_BatchNorm", )(x) - if version == "v1": - x = keras.layers.ReLU(6.0)(x) - else: - if stride == 1 and infilters == filters: - x = keras.layers.Add(name=prefix + "Add")([shortcut, x]) + if stride == 1 and infilters == filters: + x = keras.layers.Add(name=prefix + "Add")([shortcut, x]) + + return x + + +def apply_depthwise_conv_block( + x, + filters, + kernel_size=3, + depth_multiplier=1, + stride=1, + block_id=1, +): + """Adds a depthwise convolution block. + + A depthwise convolution block consists of a depthwise conv, + batch normalization, relu6, pointwise convolution, + batch normalization and relu6 activation. + + Args: + x: Input tensor of shape `(rows, cols, channels) + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the pointwise convolution). + depth_multiplier: controls the width of the network. - If `depth_multiplier` < 1.0, + proportionally decreases the number of filters in each layer. + - If `depth_multiplier` > 1.0, proportionally increases the number of filters + in each layer. + - If `depth_multiplier` = 1, default number of filters from the paper are + used at each layer. + strides: An integer or tuple/list of 2 integers, specifying the strides + of the convolution along the width and height. + Can be a single integer to specify the same value for + all spatial dimensions. Specifying any stride value != 1 is + incompatible with specifying any `dilation_rate` value != 1. + block_id: Integer, a unique identification designating the block number. + + Input shape: + 4D tensor with shape: `(batch, rows, cols, channels)` + Returns: + Output tensor of block. + """ + + if stride == 2: + x = keras.layers.ZeroPadding2D( + padding=correct_pad_downsample(x, kernel_size), + name="conv_pad_%d" % block_id, + )(x) + + x = keras.layers.DepthwiseConv2D( + kernel_size, + strides=stride, + padding="same" if stride == 1 else "valid", + depth_multiplier=depth_multiplier, + use_bias=False, + name="depthwise_%d" % block_id, + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="depthwise_BatchNorm_%d" % block_id, + )(x) + x = keras.layers.ReLU(6.0)(x) + + x = keras.layers.Conv2D( + filters, + kernel_size=1, + padding="same", + use_bias=False, + name="conv_%d" % block_id, + )(x) + x = keras.layers.BatchNormalization( + axis=CHANNEL_AXIS, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="BatchNorm_%d" % block_id, + )(x) + x = keras.layers.ReLU(6.0)(x) return x diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index 68671c6b3e..98bac8feee 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -29,9 +29,12 @@ def setUp(self): "stackwise_se_ratio": [0.25, None, 0.25], "stackwise_activation": ["relu", "relu", "hard_swish"], "include_rescaling": False, + "output_filter": 1280, + "activation": "hard_swish", + "inverted_res_block": True, + "input_filter": 16, "input_shape": (224, 224, 3), - "alpha": 1, - "version": "v3", + "depth_multiplier": 1, } self.input_data = np.ones((2, 224, 224, 3), dtype="float32") diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py index ed6239ee26..3e08f3482c 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier.py @@ -57,7 +57,9 @@ class MobileNetImageClassifier(ImageClassifier): stackwise_se_ratio = [ 0.25, None, 0.25], stackwise_activation = ["relu", "relu", "hard_swish"], include_rescaling = False, - "version"="v3", + output_filter=1280, + activation="hard_swish", + inverted_res_block=True, ) classifier = keras_nlp.models.MobileNetImageClassifier( backbone=backbone, diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index d556478840..24c9f5065e 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -27,27 +27,18 @@ def setUp(self): self.images = np.ones((2, 224, 224, 3), dtype="float32") self.labels = [0, 3] self.backbone = MobileNetBackbone( - stackwise_expansion=[ - 1, - 4, - 6, - ], + stackwise_expansion=[1, 4, 6], stackwise_filters=[4, 8, 16], stackwise_kernel_size=[3, 3, 5], stackwise_stride=[2, 2, 1], - stackwise_se_ratio=[ - 0.25, - None, - 0.25, - ], - stackwise_activation=[ - "relu", - "relu", - "hard_swish", - ], + stackwise_se_ratio=[0.25, None, 0.25], + stackwise_activation=["relu", "relu", "hard_swish"], include_rescaling=False, + output_filter=1280, + activation="hard_swish", + inverted_res_block=True, + input_filter=16, input_shape=(224, 224, 3), - version="v3", ) self.init_kwargs = { "backbone": self.backbone, From b442f7e145eea230354f1e8bf80259741a1ff127 Mon Sep 17 00:00:00 2001 From: ushareng Date: Wed, 21 Aug 2024 15:23:53 +0530 Subject: [PATCH 16/28] input_shape changed to image_shape in arg --- keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py | 2 +- .../src/models/mobilenet/mobilenet_image_classifier_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index 98bac8feee..8b89285729 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -33,7 +33,7 @@ def setUp(self): "activation": "hard_swish", "inverted_res_block": True, "input_filter": 16, - "input_shape": (224, 224, 3), + "image_shape": (224, 224, 3), "depth_multiplier": 1, } self.input_data = np.ones((2, 224, 224, 3), dtype="float32") diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index 24c9f5065e..88b71f44a4 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -38,7 +38,7 @@ def setUp(self): activation="hard_swish", inverted_res_block=True, input_filter=16, - input_shape=(224, 224, 3), + image_shape=(224, 224, 3), ) self.init_kwargs = { "backbone": self.backbone, From 5e967314ea2324bc502ba25c3986ebb7eeda0d70 Mon Sep 17 00:00:00 2001 From: ushareng Date: Wed, 21 Aug 2024 15:57:19 +0530 Subject: [PATCH 17/28] config updated --- keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index ecce30f6a2..d137cd1b40 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -212,7 +212,7 @@ def __init__( self.depth_multiplier = depth_multiplier self.input_filter = input_filter self.output_filter = output_filter - self.activation = activation + self.activation = activation_str self.inverted_res_block = inverted_res_block self.image_shape = image_shape[1:] From 3108c7e28a9c845ae27ff639c65bed88582b1105 Mon Sep 17 00:00:00 2001 From: ushareng Date: Wed, 21 Aug 2024 16:38:48 +0530 Subject: [PATCH 18/28] input shape corrected --- keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index d137cd1b40..fdd791bfed 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -214,7 +214,7 @@ def __init__( self.output_filter = output_filter self.activation = activation_str self.inverted_res_block = inverted_res_block - self.image_shape = image_shape[1:] + self.image_shape = image_shape def get_config(self): config = super().get_config() From 066b6ab3e3a559cdfff4f24a0c8e029ccd7044d5 Mon Sep 17 00:00:00 2001 From: ushareng Date: Thu, 22 Aug 2024 16:16:41 +0530 Subject: [PATCH 19/28] comments resolved --- .../models/mobilenet/mobilenet_backbone.py | 107 ++++++------------ .../mobilenet/mobilenet_backbone_test.py | 5 +- .../mobilenet_image_classifier_test.py | 5 +- 3 files changed, 38 insertions(+), 79 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index fdd791bfed..2892d2ca02 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -25,6 +25,7 @@ @keras_nlp_export("keras_nlp.models.MobileNetBackbone") class MobileNetBackbone(Backbone): """Instantiates the MobileNet architecture. + MobileNet is a lightweight convolutional neural network (CNN) optimized for mobile and edge devices, striking a balance between accuracy and efficiency. By employing depthwise separable convolutions @@ -51,7 +52,7 @@ class MobileNetBackbone(Backbone): include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(scale=1 / 255)` layer. - image_shape: optional shape tuple, defaults to (None, None, 3). + image_shape: optional shape tuple, defaults to (224, 224, 3). depth_multiplier: float, controls the width of the network. - If `depth_multiplier` < 1.0, proportionally decreases the number of filters in each layer. @@ -76,15 +77,14 @@ class MobileNetBackbone(Backbone): input_data = tf.ones(shape=(8, 224, 224, 3)) # Randomly initialized backbone with a custom config - model = MobileNetBackbone( - stackwise_expansion = [1, 4, 6], - stackwise_filters = [4, 8, 16], - stackwise_kernel_size = [3, 3, 5], - stackwise_stride = [2, 2, 1], - stackwise_se_ratio = [ 0.25, None, 0.25], - stackwise_activation = ["relu", "relu6", "hard_swish"], - include_rescaling = False, + stackwise_expansion=[1, 4, 6], + stackwise_filters=[4, 8, 16], + stackwise_kernel_size=[3, 3, 5], + stackwise_stride=[2, 2, 1], + stackwise_se_ratio=[0.25, None, 0.25], + stackwise_activation=["relu", "relu6", "hard_swish"], + include_rescaling=False, output_filter=1280, activation="hard_swish", inverted_res_block=True, @@ -104,21 +104,16 @@ def __init__( stackwise_activation, include_rescaling, output_filter, - activation, inverted_res_block, + activation=keras.activations.hard_swish, depth_multiplier=1.0, - input_filter=16, + input_filters=16, image_shape=(224, 224, 3), **kwargs, ): activation_str = activation - if isinstance(activation, str): - if activation == "hard_swish": - activation = apply_hard_swish - elif activation == "relu6": - activation = apply_relu6 - else: - activation = keras.activations.get(activation) + + activation = keras.activations.get(activation) # === Functional Model === @@ -128,20 +123,20 @@ def __init__( if include_rescaling: x = keras.layers.Rescaling(scale=1 / 255)(x) - input_filter = adjust_channels(input_filter) + input_filters = adjust_channels(input_filters) x = keras.layers.Conv2D( - input_filter, + input_filters, kernel_size=3, strides=(2, 2), padding="same", use_bias=False, - name="Conv", + name="input_conv", )(x) x = keras.layers.BatchNormalization( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name="Conv_BatchNorm", + name="input_batch_norm", )(x) x = activation(x) @@ -157,11 +152,7 @@ def __init__( ), kernel_size=stackwise_kernel_size[stack_index], stride=stackwise_stride[stack_index], - se_ratio=( - stackwise_se_ratio[stack_index] - if activation_str == "hard_swish" - else 0 - ), + se_ratio=(stackwise_se_ratio[stack_index]), activation=stackwise_activation[stack_index], expansion_index=stack_index, ) @@ -178,23 +169,20 @@ def __init__( ) if output_filter is not None: - if activation_str == "hard_swish": - last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) - else: - last_conv_ch = output_filter + last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) x = keras.layers.Conv2D( last_conv_ch, kernel_size=1, padding="same", use_bias=False, - name="Conv_1", + name="output_conv", )(x) x = keras.layers.BatchNormalization( axis=CHANNEL_AXIS, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, - name="Conv_1_BatchNorm", + name="output_batch_norm", )(x) x = activation(x) @@ -210,7 +198,7 @@ def __init__( self.stackwise_activation = stackwise_activation self.include_rescaling = include_rescaling self.depth_multiplier = depth_multiplier - self.input_filter = input_filter + self.input_filters = input_filters self.output_filter = output_filter self.activation = activation_str self.inverted_res_block = inverted_res_block @@ -229,7 +217,7 @@ def get_config(self): "include_rescaling": self.include_rescaling, "image_shape": self.image_shape, "depth_multiplier": self.depth_multiplier, - "input_filter": self.input_filter, + "input_filters": self.input_filters, "output_filter": self.output_filter, "activation": self.activation, "inverted_res_block": self.inverted_res_block, @@ -238,12 +226,12 @@ def get_config(self): return config -class HardSigmoidActivation(keras.layers.Layer): +class HardSigmoidActivation: def __init__(self): super().__init__() def call(self, x): - return apply_hard_sigmoid(x) + return keras.activations.hard_sigmoid(x) def get_config(self): return super().get_config() @@ -274,19 +262,6 @@ def adjust_channels(x, divisor=8, min_value=None): return new_x -def apply_hard_sigmoid(x): - activation = keras.layers.ReLU(6.0) - return activation(x + 3.0) * (1.0 / 6.0) - - -def apply_hard_swish(x): - return keras.layers.Multiply()([x, apply_hard_sigmoid(x)]) - - -def apply_relu6(x): - return keras.layers.ReLU(6.0)(x) - - def apply_inverted_res_block( x, expansion, @@ -316,14 +291,7 @@ def apply_inverted_res_block( Returns: the updated input tensor. """ - if isinstance(activation, str): - if activation == "hard_swish": - activation = apply_hard_swish - elif activation == "relu6": - activation = apply_relu6 - else: - activation = keras.activations.get(activation) - + activation = keras.activations.get(activation) shortcut = x prefix = "expanded_conv_" infilters = x.shape[CHANNEL_AXIS] @@ -370,11 +338,11 @@ def apply_inverted_res_block( if se_ratio: se_filters = adjust_channels(infilters * expansion) x = SqueezeAndExcite2D( - x, - se_filters, - adjust_channels(se_filters * se_ratio), - "relu", - HardSigmoidActivation(), + input=x, + filters=se_filters, + bottleneck_filters=adjust_channels(se_filters * se_ratio), + squeeze_activation="relu", + excite_activation=HardSigmoidActivation(), ) x = keras.layers.Conv2D( @@ -469,9 +437,7 @@ def apply_depthwise_conv_block( momentum=BN_MOMENTUM, name="BatchNorm_%d" % block_id, )(x) - x = keras.layers.ReLU(6.0)(x) - - return x + return keras.layers.ReLU(6.0)(x) def SqueezeAndExcite2D( @@ -501,15 +467,6 @@ def SqueezeAndExcite2D( keras.layers.Layer) or keras.activations.Activation instance denoting activation to be applied after excite convolution. Defaults to `sigmoid`. - Example: - - ```python - # (...) - input = tf.ones((1, 5, 5, 16), dtype=tf.float32) - x = keras.layers.Conv2D(16, (3, 3))(input) - - # (...) - ``` """ if not bottleneck_filters: bottleneck_filters = filters // 4 diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index 8b89285729..c5b3790366 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import keras import numpy as np import pytest @@ -30,9 +31,9 @@ def setUp(self): "stackwise_activation": ["relu", "relu", "hard_swish"], "include_rescaling": False, "output_filter": 1280, - "activation": "hard_swish", + "activation": keras.activations.hard_swish, "inverted_res_block": True, - "input_filter": 16, + "input_filters": 16, "image_shape": (224, 224, 3), "depth_multiplier": 1, } diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index 88b71f44a4..0a561056e4 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import keras import numpy as np import pytest @@ -35,9 +36,9 @@ def setUp(self): stackwise_activation=["relu", "relu", "hard_swish"], include_rescaling=False, output_filter=1280, - activation="hard_swish", + activation=keras.activations.hard_swish, inverted_res_block=True, - input_filter=16, + input_filters=16, image_shape=(224, 224, 3), ) self.init_kwargs = { From 70b5f9eb68387681f9e37aab44a79a081f49a478 Mon Sep 17 00:00:00 2001 From: ushareng Date: Thu, 22 Aug 2024 20:41:30 +0530 Subject: [PATCH 20/28] activation function format changed --- .../src/models/mobilenet/mobilenet_backbone.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 2892d2ca02..8bf9255aaf 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -86,7 +86,7 @@ class MobileNetBackbone(Backbone): stackwise_activation=["relu", "relu6", "hard_swish"], include_rescaling=False, output_filter=1280, - activation="hard_swish", + activation=keras.activations.hard_swish, inverted_res_block=True, ) @@ -111,10 +111,6 @@ def __init__( image_shape=(224, 224, 3), **kwargs, ): - activation_str = activation - - activation = keras.activations.get(activation) - # === Functional Model === inputs = keras.layers.Input(shape=image_shape) @@ -139,7 +135,9 @@ def __init__( name="input_batch_norm", )(x) - x = activation(x) + x = keras.layers.Activation( + activation, + )(x) for stack_index in range(len(stackwise_filters)): @@ -200,7 +198,7 @@ def __init__( self.depth_multiplier = depth_multiplier self.input_filters = input_filters self.output_filter = output_filter - self.activation = activation_str + self.activation = keras.activations.serialize(activation=activation) self.inverted_res_block = inverted_res_block self.image_shape = image_shape @@ -312,7 +310,7 @@ def apply_inverted_res_block( momentum=BN_MOMENTUM, name=prefix + "expand_BatchNorm", )(x) - x = activation(x) + x = keras.layers.Activation(activation=activation)(x) if stride == 2: x = keras.layers.ZeroPadding2D( @@ -333,7 +331,7 @@ def apply_inverted_res_block( momentum=BN_MOMENTUM, name=prefix + "depthwise_BatchNorm", )(x) - x = activation(x) + x = keras.layers.Activation(activation=activation)(x) if se_ratio: se_filters = adjust_channels(infilters * expansion) From 2f8ba292fe06893b25221f90ad2c204b25c07d88 Mon Sep 17 00:00:00 2001 From: ushareng Date: Thu, 22 Aug 2024 21:09:30 +0530 Subject: [PATCH 21/28] minor bug fixed --- .../models/mobilenet/mobilenet_backbone.py | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 8bf9255aaf..820a3295de 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -182,8 +182,7 @@ def __init__( momentum=BN_MOMENTUM, name="output_batch_norm", )(x) - - x = activation(x) + x = keras.layers.Activation(activation)(x) super().__init__(inputs=inputs, outputs=x, **kwargs) @@ -198,7 +197,7 @@ def __init__( self.depth_multiplier = depth_multiplier self.input_filters = input_filters self.output_filter = output_filter - self.activation = keras.activations.serialize(activation=activation) + self.activation = keras.activations.get(activation=activation) self.inverted_res_block = inverted_res_block self.image_shape = image_shape @@ -217,24 +216,15 @@ def get_config(self): "depth_multiplier": self.depth_multiplier, "input_filters": self.input_filters, "output_filter": self.output_filter, - "activation": self.activation, + "activation": keras.activations.serialize( + activation=self.activation + ), "inverted_res_block": self.inverted_res_block, } ) return config -class HardSigmoidActivation: - def __init__(self): - super().__init__() - - def call(self, x): - return keras.activations.hard_sigmoid(x) - - def get_config(self): - return super().get_config() - - def adjust_channels(x, divisor=8, min_value=None): """Ensure that all layers have a channel number divisible by the `divisor`. @@ -340,7 +330,7 @@ def apply_inverted_res_block( filters=se_filters, bottleneck_filters=adjust_channels(se_filters * se_ratio), squeeze_activation="relu", - excite_activation=HardSigmoidActivation(), + excite_activation=keras.activations.hard_sigmoid, ) x = keras.layers.Conv2D( From 67efa000320ae60981c71ac7d5f04a7c66b97c5b Mon Sep 17 00:00:00 2001 From: ushareng Date: Thu, 22 Aug 2024 21:52:37 +0530 Subject: [PATCH 22/28] minor bug fixed --- keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 820a3295de..2e73883470 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -197,7 +197,7 @@ def __init__( self.depth_multiplier = depth_multiplier self.input_filters = input_filters self.output_filter = output_filter - self.activation = keras.activations.get(activation=activation) + self.activation = keras.activations.get(activation) self.inverted_res_block = inverted_res_block self.image_shape = image_shape From 83abfc68feb6bb724c0646c8df25253fd6dc31fb Mon Sep 17 00:00:00 2001 From: ushareng Date: Fri, 23 Aug 2024 12:19:02 +0530 Subject: [PATCH 23/28] added vision_backbone_test --- keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 3 +++ keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 2e73883470..513e1f88f4 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -112,6 +112,9 @@ def __init__( **kwargs, ): # === Functional Model === + CHANNEL_AXIS = ( + 1 if keras.config.image_data_format == "channels_first" else -1 + ) inputs = keras.layers.Input(shape=image_shape) x = inputs diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index c5b3790366..07bbc6984f 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -40,7 +40,7 @@ def setUp(self): self.input_data = np.ones((2, 224, 224, 3), dtype="float32") def test_backbone_basics(self): - self.run_backbone_test( + self.run_vision_backbone_test( cls=MobileNetBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, From 8d0e6dcbe3b69165034f62eceea34d46b9222053 Mon Sep 17 00:00:00 2001 From: ushareng Date: Fri, 23 Aug 2024 12:43:38 +0530 Subject: [PATCH 24/28] channel_first bug resolved --- keras_nlp/src/models/mobilenet/mobilenet_backbone.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 513e1f88f4..5903610170 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -463,13 +463,16 @@ def SqueezeAndExcite2D( bottleneck_filters = filters // 4 x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input) + if CHANNEL_AXIS == 1: + x = keras.layers.Permute((2, 3, 1))(x) x = keras.layers.Conv2D( bottleneck_filters, (1, 1), activation=squeeze_activation, )(x) x = keras.layers.Conv2D(filters, (1, 1), activation=excite_activation)(x) - + if CHANNEL_AXIS == 1: + x = keras.layers.Permute((3, 1, 2))(x) x = ops.multiply(x, input) return x From 0af04abbbd95ab643f972d50b2b7bb8563f1bc09 Mon Sep 17 00:00:00 2001 From: ushareng Date: Fri, 23 Aug 2024 16:43:01 +0530 Subject: [PATCH 25/28] channel_first cases working --- .../models/mobilenet/mobilenet_backbone.py | 35 +++++++++++++------ .../mobilenet/mobilenet_backbone_test.py | 1 + 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index 5903610170..a9b8503945 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -17,7 +17,7 @@ from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.backbone import Backbone -CHANNEL_AXIS = -1 +# CHANNEL_AXIS = -1 if keras.config.image_data_format() == "channels_last" else 1 BN_EPSILON = 1e-3 BN_MOMENTUM = 0.999 @@ -105,15 +105,15 @@ def __init__( include_rescaling, output_filter, inverted_res_block, + image_shape=(224, 224, 3), activation=keras.activations.hard_swish, depth_multiplier=1.0, input_filters=16, - image_shape=(224, 224, 3), **kwargs, ): # === Functional Model === CHANNEL_AXIS = ( - 1 if keras.config.image_data_format == "channels_first" else -1 + -1 if keras.config.image_data_format() == "channels_last" else 1 ) inputs = keras.layers.Input(shape=image_shape) @@ -128,6 +128,7 @@ def __init__( kernel_size=3, strides=(2, 2), padding="same", + data_format=keras.config.image_data_format(), use_bias=False, name="input_conv", )(x) @@ -137,7 +138,6 @@ def __init__( momentum=BN_MOMENTUM, name="input_batch_norm", )(x) - x = keras.layers.Activation( activation, )(x) @@ -176,6 +176,7 @@ def __init__( last_conv_ch, kernel_size=1, padding="same", + data_format=keras.config.image_data_format(), use_bias=False, name="output_conv", )(x) @@ -282,6 +283,9 @@ def apply_inverted_res_block( Returns: the updated input tensor. """ + CHANNEL_AXIS = ( + -1 if keras.config.image_data_format() == "channels_last" else 1 + ) activation = keras.activations.get(activation) shortcut = x prefix = "expanded_conv_" @@ -294,6 +298,7 @@ def apply_inverted_res_block( adjust_channels(infilters * expansion), kernel_size=1, padding="same", + data_format=keras.config.image_data_format(), use_bias=False, name=prefix + "expand", )(x) @@ -315,6 +320,7 @@ def apply_inverted_res_block( kernel_size, strides=stride, padding="same" if stride == 1 else "valid", + data_format=keras.config.image_data_format(), use_bias=False, name=prefix + "depthwise", )(x) @@ -340,6 +346,7 @@ def apply_inverted_res_block( filters, kernel_size=1, padding="same", + data_format=keras.config.image_data_format(), use_bias=False, name=prefix + "project", )(x) @@ -392,7 +399,9 @@ def apply_depthwise_conv_block( Returns: Output tensor of block. """ - + CHANNEL_AXIS = ( + -1 if keras.config.image_data_format() == "channels_last" else 1 + ) if stride == 2: x = keras.layers.ZeroPadding2D( padding=correct_pad_downsample(x, kernel_size), @@ -403,6 +412,7 @@ def apply_depthwise_conv_block( kernel_size, strides=stride, padding="same" if stride == 1 else "valid", + data_format=keras.config.image_data_format(), depth_multiplier=depth_multiplier, use_bias=False, name="depthwise_%d" % block_id, @@ -419,6 +429,7 @@ def apply_depthwise_conv_block( filters, kernel_size=1, padding="same", + data_format=keras.config.image_data_format(), use_bias=False, name="conv_%d" % block_id, )(x) @@ -463,16 +474,20 @@ def SqueezeAndExcite2D( bottleneck_filters = filters // 4 x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input) - if CHANNEL_AXIS == 1: - x = keras.layers.Permute((2, 3, 1))(x) + x = keras.layers.Conv2D( bottleneck_filters, (1, 1), + data_format=keras.config.image_data_format(), activation=squeeze_activation, )(x) - x = keras.layers.Conv2D(filters, (1, 1), activation=excite_activation)(x) - if CHANNEL_AXIS == 1: - x = keras.layers.Permute((3, 1, 2))(x) + x = keras.layers.Conv2D( + filters, + (1, 1), + data_format=keras.config.image_data_format(), + activation=excite_activation, + )(x) + x = ops.multiply(x, input) return x diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index 07bbc6984f..542c1cc785 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -46,6 +46,7 @@ def test_backbone_basics(self): input_data=self.input_data, expected_output_shape=(2, 28, 28, 96), run_mixed_precision_check=False, + run_data_format_check=False, ) @pytest.mark.large From 27e1759b5cb0fe663b0267fdbdaac5935f973e3c Mon Sep 17 00:00:00 2001 From: ushareng Date: Sat, 24 Aug 2024 12:18:33 +0530 Subject: [PATCH 26/28] comments resolved --- .../models/mobilenet/mobilenet_backbone.py | 78 ++++++++++--------- .../mobilenet/mobilenet_backbone_test.py | 2 +- .../mobilenet_image_classifier_test.py | 4 +- 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index a9b8503945..f0aeaa9ecb 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -17,7 +17,6 @@ from keras_nlp.src.api_export import keras_nlp_export from keras_nlp.src.models.backbone import Backbone -# CHANNEL_AXIS = -1 if keras.config.image_data_format() == "channels_last" else 1 BN_EPSILON = 1e-3 BN_MOMENTUM = 0.999 @@ -47,8 +46,14 @@ class MobileNetBackbone(Backbone): each inverted residual block in the model. stackwise_filters: list of ints, number of filters for each inverted residual block in the model. + stackwise_kernel_size: list of ints, kernel size for each inverted + residual block in the model. stackwise_stride: list of ints, stride length for each inverted residual block in the model. + stackwise_se_ratio: se ratio for each inverted residual block in the + model. 0 if dont want to add Squeeze and Excite layer. + stackwise_activation: list of activation functions, for each inverted + residual block in the model. include_rescaling: bool, whether to rescale the inputs. If set to True, inputs will be passed through a `Rescaling(scale=1 / 255)` layer. @@ -60,8 +65,8 @@ class MobileNetBackbone(Backbone): of filters in each layer. - If `depth_multiplier` = 1, default number of filters from the paper are used at each layer. - input_filter: number of filters in first convolution layer - output_filter: specifies whether to add conv and batch_norm in the end, + input_filters: number of filters in first convolution layer + output_filters: specifies whether to add conv and batch_norm in the end, if set to None, it will not add these layers in the end. 'None' for MobileNetV1 activation: activation function to be used @@ -85,7 +90,7 @@ class MobileNetBackbone(Backbone): stackwise_se_ratio=[0.25, None, 0.25], stackwise_activation=["relu", "relu6", "hard_swish"], include_rescaling=False, - output_filter=1280, + output_filters=1280, activation=keras.activations.hard_swish, inverted_res_block=True, @@ -103,16 +108,16 @@ def __init__( stackwise_se_ratio, stackwise_activation, include_rescaling, - output_filter, + output_filters, inverted_res_block, image_shape=(224, 224, 3), - activation=keras.activations.hard_swish, + activation="hard_swish", depth_multiplier=1.0, input_filters=16, **kwargs, ): # === Functional Model === - CHANNEL_AXIS = ( + channel_axis = ( -1 if keras.config.image_data_format() == "channels_last" else 1 ) @@ -133,24 +138,23 @@ def __init__( name="input_conv", )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name="input_batch_norm", )(x) - x = keras.layers.Activation( - activation, - )(x) + x = keras.layers.Activation(activation)(x) for stack_index in range(len(stackwise_filters)): + filters = adjust_channels( + (stackwise_filters[stack_index]) * depth_multiplier + ) if inverted_res_block: x = apply_inverted_res_block( x, expansion=stackwise_expansion[stack_index], - filters=adjust_channels( - (stackwise_filters[stack_index]) * depth_multiplier - ), + filters=filters, kernel_size=stackwise_kernel_size[stack_index], stride=stackwise_stride[stack_index], se_ratio=(stackwise_se_ratio[stack_index]), @@ -160,17 +164,15 @@ def __init__( else: x = apply_depthwise_conv_block( x, - filters=adjust_channels( - (stackwise_filters[stack_index]) * depth_multiplier - ), + filters=filters, kernel_size=3, stride=stackwise_stride[stack_index], depth_multiplier=depth_multiplier, block_id=stack_index, ) - if output_filter is not None: - last_conv_ch = adjust_channels(x.shape[CHANNEL_AXIS] * 6) + if output_filters is not None: + last_conv_ch = adjust_channels(x.shape[channel_axis] * 6) x = keras.layers.Conv2D( last_conv_ch, @@ -181,7 +183,7 @@ def __init__( name="output_conv", )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name="output_batch_norm", @@ -200,7 +202,7 @@ def __init__( self.include_rescaling = include_rescaling self.depth_multiplier = depth_multiplier self.input_filters = input_filters - self.output_filter = output_filter + self.output_filters = output_filters self.activation = keras.activations.get(activation) self.inverted_res_block = inverted_res_block self.image_shape = image_shape @@ -219,7 +221,7 @@ def get_config(self): "image_shape": self.image_shape, "depth_multiplier": self.depth_multiplier, "input_filters": self.input_filters, - "output_filter": self.output_filter, + "output_filters": self.output_filters, "activation": keras.activations.serialize( activation=self.activation ), @@ -283,13 +285,13 @@ def apply_inverted_res_block( Returns: the updated input tensor. """ - CHANNEL_AXIS = ( + channel_axis = ( -1 if keras.config.image_data_format() == "channels_last" else 1 ) activation = keras.activations.get(activation) shortcut = x prefix = "expanded_conv_" - infilters = x.shape[CHANNEL_AXIS] + infilters = x.shape[channel_axis] if expansion_index > 0: prefix = f"expanded_conv_{expansion_index}_" @@ -303,7 +305,7 @@ def apply_inverted_res_block( name=prefix + "expand", )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name=prefix + "expand_BatchNorm", @@ -325,7 +327,7 @@ def apply_inverted_res_block( name=prefix + "depthwise", )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name=prefix + "depthwise_BatchNorm", @@ -351,7 +353,7 @@ def apply_inverted_res_block( name=prefix + "project", )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name=prefix + "project_BatchNorm", @@ -381,12 +383,13 @@ def apply_depthwise_conv_block( x: Input tensor of shape `(rows, cols, channels) filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the pointwise convolution). - depth_multiplier: controls the width of the network. - If `depth_multiplier` < 1.0, - proportionally decreases the number of filters in each layer. - - If `depth_multiplier` > 1.0, proportionally increases the number of filters - in each layer. - - If `depth_multiplier` = 1, default number of filters from the paper are - used at each layer. + depth_multiplier: controls the width of the network. + - If `depth_multiplier` < 1.0, proportionally decreases the number + of filters in each layer. + - If `depth_multiplier` > 1.0, proportionally increases the number + of filters in each layer. + - If `depth_multiplier` = 1, default number of filters from the + paper are used at each layer. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for @@ -395,11 +398,12 @@ def apply_depthwise_conv_block( block_id: Integer, a unique identification designating the block number. Input shape: - 4D tensor with shape: `(batch, rows, cols, channels)` + 4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last" + 4D tensor with shape: `(batch, channels, rows, cols)` in "channels_first" Returns: Output tensor of block. """ - CHANNEL_AXIS = ( + channel_axis = ( -1 if keras.config.image_data_format() == "channels_last" else 1 ) if stride == 2: @@ -418,7 +422,7 @@ def apply_depthwise_conv_block( name="depthwise_%d" % block_id, )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name="depthwise_BatchNorm_%d" % block_id, @@ -434,7 +438,7 @@ def apply_depthwise_conv_block( name="conv_%d" % block_id, )(x) x = keras.layers.BatchNormalization( - axis=CHANNEL_AXIS, + axis=channel_axis, epsilon=BN_EPSILON, momentum=BN_MOMENTUM, name="BatchNorm_%d" % block_id, diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index 542c1cc785..f8fdd2079e 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -30,7 +30,7 @@ def setUp(self): "stackwise_se_ratio": [0.25, None, 0.25], "stackwise_activation": ["relu", "relu", "hard_swish"], "include_rescaling": False, - "output_filter": 1280, + "output_filters": 1280, "activation": keras.activations.hard_swish, "inverted_res_block": True, "input_filters": 16, diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index 0a561056e4..46ce9c9220 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -35,8 +35,8 @@ def setUp(self): stackwise_se_ratio=[0.25, None, 0.25], stackwise_activation=["relu", "relu", "hard_swish"], include_rescaling=False, - output_filter=1280, - activation=keras.activations.hard_swish, + output_filters=1280, + activation="hard_swish", inverted_res_block=True, input_filters=16, image_shape=(224, 224, 3), From b4bf090fb037d6e548295dfc51071f6c7f9cad05 Mon Sep 17 00:00:00 2001 From: ushareng Date: Mon, 26 Aug 2024 00:59:23 +0530 Subject: [PATCH 27/28] formatting fixed --- .../src/models/mobilenet/mobilenet_image_classifier_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index 46ce9c9220..ab393af3ec 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import keras import numpy as np import pytest From 1804ffeff6f38d1b2ba1c2071333cfc39c2ce6b0 Mon Sep 17 00:00:00 2001 From: ushareng Date: Tue, 27 Aug 2024 19:04:29 +0530 Subject: [PATCH 28/28] refactoring --- .../models/mobilenet/mobilenet_backbone.py | 79 +++++++++++-------- .../mobilenet/mobilenet_backbone_test.py | 12 +-- .../mobilenet_image_classifier_test.py | 11 +-- 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py index f0aeaa9ecb..4054b6d76f 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone.py @@ -28,7 +28,7 @@ class MobileNetBackbone(Backbone): MobileNet is a lightweight convolutional neural network (CNN) optimized for mobile and edge devices, striking a balance between accuracy and efficiency. By employing depthwise separable convolutions - and techniques like Squeeze-and-Excitation (SE) blocks in later versions, + and techniques like Squeeze-and-Excitation (SE) blocks, MobileNet models are highly suitable for real-time applications on resource-constrained devices. @@ -44,11 +44,11 @@ class MobileNetBackbone(Backbone): Args: stackwise_expansion: list of ints or floats, the expansion ratio for each inverted residual block in the model. - stackwise_filters: list of ints, number of filters for each inverted + stackwise_num_filters: list of ints, number of filters for each inverted residual block in the model. stackwise_kernel_size: list of ints, kernel size for each inverted residual block in the model. - stackwise_stride: list of ints, stride length for each inverted + stackwise_num_strides: list of ints, stride length for each inverted residual block in the model. stackwise_se_ratio: se ratio for each inverted residual block in the model. 0 if dont want to add Squeeze and Excite layer. @@ -65,11 +65,14 @@ class MobileNetBackbone(Backbone): of filters in each layer. - If `depth_multiplier` = 1, default number of filters from the paper are used at each layer. - input_filters: number of filters in first convolution layer - output_filters: specifies whether to add conv and batch_norm in the end, + input_num_filters: number of filters in first convolution layer + output_num_filters: specifies whether to add conv and batch_norm in the end, if set to None, it will not add these layers in the end. 'None' for MobileNetV1 - activation: activation function to be used + input_activation: activation function to be used in the input layer + 'hard_swish' for MobileNetV3, + 'relu6' for MobileNetV1 and MobileNetV2 + output_activation: activation function to be used in the output layer 'hard_swish' for MobileNetV3, 'relu6' for MobileNetV1 and MobileNetV2 inverted_res_block: whether to use inverted residual blocks or not, @@ -84,14 +87,15 @@ class MobileNetBackbone(Backbone): # Randomly initialized backbone with a custom config model = MobileNetBackbone( stackwise_expansion=[1, 4, 6], - stackwise_filters=[4, 8, 16], + stackwise_num_filters=[4, 8, 16], stackwise_kernel_size=[3, 3, 5], - stackwise_stride=[2, 2, 1], + stackwise_num_strides=[2, 2, 1], stackwise_se_ratio=[0.25, None, 0.25], stackwise_activation=["relu", "relu6", "hard_swish"], include_rescaling=False, - output_filters=1280, - activation=keras.activations.hard_swish, + output_num_filters=1280, + input_activation='hard_swish', + output_activation='hard_swish', inverted_res_block=True, ) @@ -102,18 +106,19 @@ class MobileNetBackbone(Backbone): def __init__( self, stackwise_expansion, - stackwise_filters, + stackwise_num_filters, stackwise_kernel_size, - stackwise_stride, + stackwise_num_strides, stackwise_se_ratio, stackwise_activation, include_rescaling, - output_filters, + output_num_filters, inverted_res_block, image_shape=(224, 224, 3), - activation="hard_swish", + input_activation="hard_swish", + output_activation="hard_swish", depth_multiplier=1.0, - input_filters=16, + input_num_filters=16, **kwargs, ): # === Functional Model === @@ -127,9 +132,9 @@ def __init__( if include_rescaling: x = keras.layers.Rescaling(scale=1 / 255)(x) - input_filters = adjust_channels(input_filters) + input_num_filters = adjust_channels(input_num_filters) x = keras.layers.Conv2D( - input_filters, + input_num_filters, kernel_size=3, strides=(2, 2), padding="same", @@ -143,11 +148,11 @@ def __init__( momentum=BN_MOMENTUM, name="input_batch_norm", )(x) - x = keras.layers.Activation(activation)(x) + x = keras.layers.Activation(input_activation)(x) - for stack_index in range(len(stackwise_filters)): + for stack_index in range(len(stackwise_num_filters)): filters = adjust_channels( - (stackwise_filters[stack_index]) * depth_multiplier + (stackwise_num_filters[stack_index]) * depth_multiplier ) if inverted_res_block: @@ -156,7 +161,7 @@ def __init__( expansion=stackwise_expansion[stack_index], filters=filters, kernel_size=stackwise_kernel_size[stack_index], - stride=stackwise_stride[stack_index], + stride=stackwise_num_strides[stack_index], se_ratio=(stackwise_se_ratio[stack_index]), activation=stackwise_activation[stack_index], expansion_index=stack_index, @@ -166,12 +171,12 @@ def __init__( x, filters=filters, kernel_size=3, - stride=stackwise_stride[stack_index], + stride=stackwise_num_strides[stack_index], depth_multiplier=depth_multiplier, block_id=stack_index, ) - if output_filters is not None: + if output_num_filters is not None: last_conv_ch = adjust_channels(x.shape[channel_axis] * 6) x = keras.layers.Conv2D( @@ -188,22 +193,23 @@ def __init__( momentum=BN_MOMENTUM, name="output_batch_norm", )(x) - x = keras.layers.Activation(activation)(x) + x = keras.layers.Activation(output_activation)(x) super().__init__(inputs=inputs, outputs=x, **kwargs) # === Config === self.stackwise_expansion = stackwise_expansion - self.stackwise_filters = stackwise_filters + self.stackwise_num_filters = stackwise_num_filters self.stackwise_kernel_size = stackwise_kernel_size - self.stackwise_stride = stackwise_stride + self.stackwise_num_strides = stackwise_num_strides self.stackwise_se_ratio = stackwise_se_ratio self.stackwise_activation = stackwise_activation self.include_rescaling = include_rescaling self.depth_multiplier = depth_multiplier - self.input_filters = input_filters - self.output_filters = output_filters - self.activation = keras.activations.get(activation) + self.input_num_filters = input_num_filters + self.output_num_filters = output_num_filters + self.input_activation = keras.activations.get(input_activation) + self.output_activation = keras.activations.get(output_activation) self.inverted_res_block = inverted_res_block self.image_shape = image_shape @@ -212,18 +218,21 @@ def get_config(self): config.update( { "stackwise_expansion": self.stackwise_expansion, - "stackwise_filters": self.stackwise_filters, + "stackwise_num_filters": self.stackwise_num_filters, "stackwise_kernel_size": self.stackwise_kernel_size, - "stackwise_stride": self.stackwise_stride, + "stackwise_num_strides": self.stackwise_num_strides, "stackwise_se_ratio": self.stackwise_se_ratio, "stackwise_activation": self.stackwise_activation, "include_rescaling": self.include_rescaling, "image_shape": self.image_shape, "depth_multiplier": self.depth_multiplier, - "input_filters": self.input_filters, - "output_filters": self.output_filters, - "activation": keras.activations.serialize( - activation=self.activation + "input_num_filters": self.input_num_filters, + "output_num_filters": self.output_num_filters, + "input_activation": keras.activations.serialize( + activation=self.input_activation + ), + "output_activation": keras.activations.serialize( + activation=self.output_activation ), "inverted_res_block": self.inverted_res_block, } diff --git a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py index f8fdd2079e..80225abe04 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_backbone_test.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import keras import numpy as np import pytest @@ -24,16 +23,17 @@ class MobileNetBackboneTest(TestCase): def setUp(self): self.init_kwargs = { "stackwise_expansion": [1, 4, 6], - "stackwise_filters": [4, 8, 16], + "stackwise_num_filters": [4, 8, 16], "stackwise_kernel_size": [3, 3, 5], - "stackwise_stride": [2, 2, 1], + "stackwise_num_strides": [2, 2, 1], "stackwise_se_ratio": [0.25, None, 0.25], "stackwise_activation": ["relu", "relu", "hard_swish"], "include_rescaling": False, - "output_filters": 1280, - "activation": keras.activations.hard_swish, + "output_num_filters": 1280, + "input_activation": "hard_swish", + "output_activation": "hard_swish", "inverted_res_block": True, - "input_filters": 16, + "input_num_filters": 16, "image_shape": (224, 224, 3), "depth_multiplier": 1, } diff --git a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py index ab393af3ec..29d00e6d24 100644 --- a/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_nlp/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -28,16 +28,17 @@ def setUp(self): self.labels = [0, 3] self.backbone = MobileNetBackbone( stackwise_expansion=[1, 4, 6], - stackwise_filters=[4, 8, 16], + stackwise_num_filters=[4, 8, 16], stackwise_kernel_size=[3, 3, 5], - stackwise_stride=[2, 2, 1], + stackwise_num_strides=[2, 2, 1], stackwise_se_ratio=[0.25, None, 0.25], stackwise_activation=["relu", "relu", "hard_swish"], include_rescaling=False, - output_filters=1280, - activation="hard_swish", + output_num_filters=1280, + input_activation="hard_swish", + output_activation="hard_swish", inverted_res_block=True, - input_filters=16, + input_num_filters=16, image_shape=(224, 224, 3), ) self.init_kwargs = {