diff --git a/keras_hub/src/models/mobilenet/mobilenet_backbone.py b/keras_hub/src/models/mobilenet/mobilenet_backbone.py index d3aff7e9b8..545c66d441 100644 --- a/keras_hub/src/models/mobilenet/mobilenet_backbone.py +++ b/keras_hub/src/models/mobilenet/mobilenet_backbone.py @@ -1,506 +1,514 @@ -import keras -from keras import ops - -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.backbone import Backbone - -BN_EPSILON = 1e-3 -BN_MOMENTUM = 0.999 - - -@keras_hub_export("keras_hub.models.MobileNetBackbone") -class MobileNetBackbone(Backbone): - """Instantiates the MobileNet architecture. - - MobileNet is a lightweight convolutional neural network (CNN) - optimized for mobile and edge devices, striking a balance between - accuracy and efficiency. By employing depthwise separable convolutions - and techniques like Squeeze-and-Excitation (SE) blocks, - MobileNet models are highly suitable for real-time applications on - resource-constrained devices. - - References: - - [MobileNets: Efficient Convolutional Neural Networks - for Mobile Vision Applications]( - https://arxiv.org/abs/1704.04861) - - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( - https://arxiv.org/abs/1801.04381) (CVPR 2018) - - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) - (ICCV 2019) - - Args: - stackwise_expansion: list of ints or floats, the expansion ratio for - each inverted residual block in the model. - stackwise_num_filters: list of ints, number of filters for each inverted - residual block in the model. - stackwise_kernel_size: list of ints, kernel size for each inverted - residual block in the model. - stackwise_num_strides: list of ints, stride length for each inverted - residual block in the model. - stackwise_se_ratio: se ratio for each inverted residual block in the - model. 0 if dont want to add Squeeze and Excite layer. - stackwise_activation: list of activation functions, for each inverted - residual block in the model. - image_shape: optional shape tuple, defaults to (224, 224, 3). - depth_multiplier: float, controls the width of the network. - - If `depth_multiplier` < 1.0, proportionally decreases the number - of filters in each layer. - - If `depth_multiplier` > 1.0, proportionally increases the number - of filters in each layer. - - If `depth_multiplier` = 1, default number of filters from the paper - are used at each layer. - input_num_filters: number of filters in first convolution layer - output_num_filters: specifies whether to add conv and batch_norm in the end, - if set to None, it will not add these layers in the end. - 'None' for MobileNetV1 - input_activation: activation function to be used in the input layer - 'hard_swish' for MobileNetV3, - 'relu6' for MobileNetV1 and MobileNetV2 - output_activation: activation function to be used in the output layer - 'hard_swish' for MobileNetV3, - 'relu6' for MobileNetV1 and MobileNetV2 - inverted_res_block: whether to use inverted residual blocks or not, - 'False' for MobileNetV1, - 'True' for MobileNetV2 and MobileNetV3 - - - Example: - ```python - input_data = tf.ones(shape=(8, 224, 224, 3)) - - # Randomly initialized backbone with a custom config - model = MobileNetBackbone( - stackwise_expansion=[1, 4, 6], - stackwise_num_filters=[4, 8, 16], - stackwise_kernel_size=[3, 3, 5], - stackwise_num_strides=[2, 2, 1], - stackwise_se_ratio=[0.25, None, 0.25], - stackwise_activation=["relu", "relu6", "hard_swish"], - output_num_filters=1280, - input_activation='hard_swish', - output_activation='hard_swish', - inverted_res_block=True, - - ) - output = model(input_data) - ``` - """ - - def __init__( - self, - stackwise_expansion, - stackwise_num_filters, - stackwise_kernel_size, - stackwise_num_strides, - stackwise_se_ratio, - stackwise_activation, - output_num_filters, - inverted_res_block, - image_shape=(224, 224, 3), - input_activation="hard_swish", - output_activation="hard_swish", - depth_multiplier=1.0, - input_num_filters=16, - **kwargs, - ): - # === Functional Model === - channel_axis = ( - -1 if keras.config.image_data_format() == "channels_last" else 1 - ) - - image_input = keras.layers.Input(shape=image_shape) - x = image_input # Intermediate result. - input_num_filters = adjust_channels(input_num_filters) - x = keras.layers.Conv2D( - input_num_filters, - kernel_size=3, - strides=(2, 2), - padding="same", - data_format=keras.config.image_data_format(), - use_bias=False, - name="input_conv", - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name="input_batch_norm", - )(x) - x = keras.layers.Activation(input_activation)(x) - - for stack_index in range(len(stackwise_num_filters)): - filters = adjust_channels( - (stackwise_num_filters[stack_index]) * depth_multiplier - ) - - if inverted_res_block: - x = apply_inverted_res_block( - x, - expansion=stackwise_expansion[stack_index], - filters=filters, - kernel_size=stackwise_kernel_size[stack_index], - stride=stackwise_num_strides[stack_index], - se_ratio=(stackwise_se_ratio[stack_index]), - activation=stackwise_activation[stack_index], - expansion_index=stack_index, - ) - else: - x = apply_depthwise_conv_block( - x, - filters=filters, - kernel_size=3, - stride=stackwise_num_strides[stack_index], - depth_multiplier=depth_multiplier, - block_id=stack_index, - ) - - if output_num_filters is not None: - last_conv_ch = adjust_channels(x.shape[channel_axis] * 6) - - x = keras.layers.Conv2D( - last_conv_ch, - kernel_size=1, - padding="same", - data_format=keras.config.image_data_format(), - use_bias=False, - name="output_conv", - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name="output_batch_norm", - )(x) - x = keras.layers.Activation(output_activation)(x) - - super().__init__(inputs=image_input, outputs=x, **kwargs) - - # === Config === - self.stackwise_expansion = stackwise_expansion - self.stackwise_num_filters = stackwise_num_filters - self.stackwise_kernel_size = stackwise_kernel_size - self.stackwise_num_strides = stackwise_num_strides - self.stackwise_se_ratio = stackwise_se_ratio - self.stackwise_activation = stackwise_activation - self.depth_multiplier = depth_multiplier - self.input_num_filters = input_num_filters - self.output_num_filters = output_num_filters - self.input_activation = keras.activations.get(input_activation) - self.output_activation = keras.activations.get(output_activation) - self.inverted_res_block = inverted_res_block - self.image_shape = image_shape - - def get_config(self): - config = super().get_config() - config.update( - { - "stackwise_expansion": self.stackwise_expansion, - "stackwise_num_filters": self.stackwise_num_filters, - "stackwise_kernel_size": self.stackwise_kernel_size, - "stackwise_num_strides": self.stackwise_num_strides, - "stackwise_se_ratio": self.stackwise_se_ratio, - "stackwise_activation": self.stackwise_activation, - "image_shape": self.image_shape, - "depth_multiplier": self.depth_multiplier, - "input_num_filters": self.input_num_filters, - "output_num_filters": self.output_num_filters, - "input_activation": keras.activations.serialize( - activation=self.input_activation - ), - "output_activation": keras.activations.serialize( - activation=self.output_activation - ), - "inverted_res_block": self.inverted_res_block, - } - ) - return config - - -def adjust_channels(x, divisor=8, min_value=None): - """Ensure that all layers have a channel number divisible by the `divisor`. - - Args: - x: integer, input value. - divisor: integer, the value by which a channel number should be - divisible, defaults to 8. - min_value: float, optional minimum value for the new tensor. If None, - defaults to value of divisor. - - Returns: - the updated input scalar. - """ - - if min_value is None: - min_value = divisor - - new_x = max(min_value, int(x + divisor / 2) // divisor * divisor) - - # make sure that round down does not go down by more than 10%. - if new_x < 0.9 * x: - new_x += divisor - return new_x - - -def apply_inverted_res_block( - x, - expansion, - filters, - kernel_size, - stride, - se_ratio, - activation, - expansion_index, -): - """An Inverted Residual Block. - - Args: - x: input tensor. - expansion: integer, the expansion ratio, multiplied with infilters to - get the minimum value passed to adjust_channels. - filters: integer, number of filters for convolution layer. - kernel_size: integer, the kernel size for DepthWise Convolutions. - stride: integer, the stride length for DepthWise Convolutions. - se_ratio: float, ratio for bottleneck filters. Number of bottleneck - filters = filters * se_ratio. - activation: the activation layer to use. - expansion_index: integer, a unique identification if you want to use - expanded convolutions. If greater than 0, an additional Conv+BN - layer is added after the expanded convolutional layer. - - Returns: - the updated input tensor. - """ - channel_axis = ( - -1 if keras.config.image_data_format() == "channels_last" else 1 - ) - activation = keras.activations.get(activation) - shortcut = x - prefix = "expanded_conv_" - infilters = x.shape[channel_axis] - - if expansion_index > 0: - prefix = f"expanded_conv_{expansion_index}_" - - x = keras.layers.Conv2D( - adjust_channels(infilters * expansion), - kernel_size=1, - padding="same", - data_format=keras.config.image_data_format(), - use_bias=False, - name=prefix + "expand", - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name=prefix + "expand_BatchNorm", - )(x) - x = keras.layers.Activation(activation=activation)(x) - - if stride == 2: - x = keras.layers.ZeroPadding2D( - padding=correct_pad_downsample(x, kernel_size), - name=prefix + "depthwise_pad", - )(x) - - x = keras.layers.DepthwiseConv2D( - kernel_size, - strides=stride, - padding="same" if stride == 1 else "valid", - data_format=keras.config.image_data_format(), - use_bias=False, - name=prefix + "depthwise", - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name=prefix + "depthwise_BatchNorm", - )(x) - x = keras.layers.Activation(activation=activation)(x) - - if se_ratio: - se_filters = adjust_channels(infilters * expansion) - x = SqueezeAndExcite2D( - input=x, - filters=se_filters, - bottleneck_filters=adjust_channels(se_filters * se_ratio), - squeeze_activation="relu", - excite_activation=keras.activations.hard_sigmoid, - ) - - x = keras.layers.Conv2D( - filters, - kernel_size=1, - padding="same", - data_format=keras.config.image_data_format(), - use_bias=False, - name=prefix + "project", - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name=prefix + "project_BatchNorm", - )(x) - - if stride == 1 and infilters == filters: - x = keras.layers.Add(name=prefix + "Add")([shortcut, x]) - - return x - - -def apply_depthwise_conv_block( - x, - filters, - kernel_size=3, - depth_multiplier=1, - stride=1, - block_id=1, -): - """Adds a depthwise convolution block. - - A depthwise convolution block consists of a depthwise conv, - batch normalization, relu6, pointwise convolution, - batch normalization and relu6 activation. - - Args: - x: Input tensor of shape `(rows, cols, channels) - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the pointwise convolution). - depth_multiplier: controls the width of the network. - - If `depth_multiplier` < 1.0, proportionally decreases the number - of filters in each layer. - - If `depth_multiplier` > 1.0, proportionally increases the number - of filters in each layer. - - If `depth_multiplier` = 1, default number of filters from the - paper are used at each layer. - strides: An integer or tuple/list of 2 integers, specifying the strides - of the convolution along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. Specifying any stride value != 1 is - incompatible with specifying any `dilation_rate` value != 1. - block_id: Integer, a unique identification designating the block number. - - Input shape: - 4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last" - 4D tensor with shape: `(batch, channels, rows, cols)` in "channels_first" - Returns: - Output tensor of block. - """ - channel_axis = ( - -1 if keras.config.image_data_format() == "channels_last" else 1 - ) - if stride == 2: - x = keras.layers.ZeroPadding2D( - padding=correct_pad_downsample(x, kernel_size), - name="conv_pad_%d" % block_id, - )(x) - - x = keras.layers.DepthwiseConv2D( - kernel_size, - strides=stride, - padding="same" if stride == 1 else "valid", - data_format=keras.config.image_data_format(), - depth_multiplier=depth_multiplier, - use_bias=False, - name="depthwise_%d" % block_id, - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name="depthwise_BatchNorm_%d" % block_id, - )(x) - x = keras.layers.ReLU(6.0)(x) - - x = keras.layers.Conv2D( - filters, - kernel_size=1, - padding="same", - data_format=keras.config.image_data_format(), - use_bias=False, - name="conv_%d" % block_id, - )(x) - x = keras.layers.BatchNormalization( - axis=channel_axis, - epsilon=BN_EPSILON, - momentum=BN_MOMENTUM, - name="BatchNorm_%d" % block_id, - )(x) - return keras.layers.ReLU(6.0)(x) - - -def SqueezeAndExcite2D( - input, - filters, - bottleneck_filters=None, - squeeze_activation="relu", - excite_activation="sigmoid", -): - """ - Description: - This layer applies a content-aware mechanism to adaptively assign - channel-wise weights. It uses global average pooling to compress - feature maps into single values, which are then processed by - two Conv1D layers: the first reduces the dimensionality, and - the second restores it. - Args: - filters: Number of input and output filters. The number of input and - output filters is same. - bottleneck_filters: (Optional) Number of bottleneck filters. Defaults - to `0.25 * filters` - squeeze_activation: (Optional) String, callable (or - keras.layers.Layer) or keras.activations.Activation instance - denoting activation to be applied after squeeze convolution. - Defaults to `relu`. - excite_activation: (Optional) String, callable (or - keras.layers.Layer) or keras.activations.Activation instance - denoting activation to be applied after excite convolution. - Defaults to `sigmoid`. - """ - if not bottleneck_filters: - bottleneck_filters = filters // 4 - - x = keras.layers.GlobalAveragePooling2D(keepdims=True)(input) - - x = keras.layers.Conv2D( - bottleneck_filters, - (1, 1), - data_format=keras.config.image_data_format(), - activation=squeeze_activation, - )(x) - x = keras.layers.Conv2D( - filters, - (1, 1), - data_format=keras.config.image_data_format(), - activation=excite_activation, - )(x) - - x = ops.multiply(x, input) - return x - - -def correct_pad_downsample(inputs, kernel_size): - """Returns a tuple for zero-padding for 2D convolution with downsampling. - - Args: - inputs: Input tensor. - kernel_size: An integer or tuple/list of 2 integers. - - Returns: - A tuple. - """ - img_dim = 1 - input_size = inputs.shape[img_dim : (img_dim + 2)] - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if input_size[0] is None: - adjust = (1, 1) - else: - adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) - correct = (kernel_size[0] // 2, kernel_size[1] // 2) - return ( - (correct[0] - adjust[0], correct[0]), - (correct[1] - adjust[1], correct[1]), - ) +import keras +from keras import ops + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.backbone import Backbone + +BN_EPSILON = 1e-5 +BN_MOMENTUM = 0.9 + + +@keras_hub_export("keras_hub.models.MobileNetBackbone") +class MobileNetBackbone(Backbone): + """Instantiates the MobileNet architecture. + + MobileNet is a lightweight convolutional neural network (CNN) + optimized for mobile and edge devices, striking a balance between + accuracy and efficiency. By employing depthwise separable convolutions + and techniques like Squeeze-and-Excitation (SE) blocks, + MobileNet models are highly suitable for real-time applications on + resource-constrained devices. + + References: + - [MobileNets: Efficient Convolutional Neural Networks + for Mobile Vision Applications]( + https://arxiv.org/abs/1704.04861) + - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( + https://arxiv.org/abs/1801.04381) (CVPR 2018) + - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) + (ICCV 2019) + + Args: + stackwise_expansion: list of list of ints, the expanded filters for + each inverted residual block for each block in the model. + stackwise_num_blocks: list of ints, number of inversted residual blocks + per block + stackwise_num_filters: list of list of ints, number of filters for each inverted + residual block in the model. + stackwise_kernel_size: list of list of ints, kernel size for each inverted + residual block in the model. + stackwise_num_strides: list of list of ints, stride length for each inverted + residual block in the model. + stackwise_se_ratio: se ratio for each inverted residual block in the + model. 0 if dont want to add Squeeze and Excite layer. + stackwise_activation: list of list of activation functions, for each inverted + residual block in the model. + image_shape: optional shape tuple, defaults to (224, 224, 3). + input_num_filters: number of filters in first convolution layer + output_num_filters: specifies whether to add conv and batch_norm in the end, + if set to None, it will not add these layers in the end. + 'None' for MobileNetV1 + stackwise_padding: list of list of ints, padding value for each inverted + residual block in the model. + input_activation: activation function to be used in the input layer + 'hard_swish' for MobileNetV3, + 'relu6' for MobileNetV1 and MobileNetV2 + output_activation: activation function to be used in the output layer + 'hard_swish' for MobileNetV3, + 'relu6' for MobileNetV1 and MobileNetV2 + depthwise_filters: int, number of filters in depthwise separable + convolution layer + squeeze_and_excite: float, squeeze and excite ratio in the depthwise layer, + None, if dont want to do squeeze and excite + + + Example: + ```python + input_data = tf.ones(shape=(8, 224, 224, 3)) + + # Randomly initialized backbone with a custom config + model = MobileNetBackbone( + stackwise_expansion=[1, 4, 6], + stackwise_num_blocks=[2, 3, 2, 3], + stackwise_num_filters=[4, 8, 16], + stackwise_kernel_size=[[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]], + stackwise_num_strides=[[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]], + stackwise_se_ratio=[ + [None, None], + [0.25, 0.25, 0.25], + [0.3, 0.3], + [0.3, 0.25, 0.25], + ], + stackwise_activation=[ + ["relu", "relu"], + ["hard_swish", "hard_swish", "hard_swish"], + ["hard_swish", "hard_swish"], + ["hard_swish", "hard_swish", "hard_swish"], + ], + stackwise_padding=[[1, 1], [2, 2, 2], [2, 2], [2, 2, 2]], + output_num_filters=288, + input_activation="hard_swish", + output_activation="hard_swish", + inverted_res_block=True, + input_num_filters=16, + image_shape=(224, 224, 3), + depthwise_filters=8, + squeeze_and_excite=0.5, + + ) + output = model(input_data) + ``` + """ + + def __init__( + self, + stackwise_expansion, + stackwise_num_blocks, + stackwise_num_filters, + stackwise_kernel_size, + stackwise_num_strides, + stackwise_se_ratio, + stackwise_activation, + stackwise_padding, + output_num_filters, + depthwise_filters, + squeeze_and_excite=None, + image_shape=(224, 224, 3), + input_activation="hard_swish", + output_activation="hard_swish", + input_num_filters=16, + **kwargs, + ): + # === Functional Model === + channel_axis = ( + -1 if keras.config.image_data_format() == "channels_last" else 1 + ) + + image_input = keras.layers.Input(shape=image_shape) + x = image_input + input_num_filters = adjust_channels(input_num_filters) + x = keras.layers.Conv2D( + input_num_filters, + kernel_size=3, + strides=(2, 2), + padding=(1, 1), + data_format=keras.config.image_data_format(), + use_bias=False, + name="input_conv", + )(x) + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="input_batch_norm", + )(x) + x = keras.layers.Activation(input_activation)(x) + + x = apply_depthwise_conv_block( + x, depthwise_filters, squeeze_and_excite, name="block_0") + + for block in range(1, len(stackwise_num_blocks)): + for inverted_block in range(stackwise_num_blocks[block]): + x = apply_inverted_res_block( + x, + expansion=stackwise_expansion[block][inverted_block], + filters=adjust_channels( + stackwise_num_filters[block][inverted_block] + ), + kernel_size=stackwise_kernel_size[block][inverted_block], + stride=stackwise_num_strides[block][inverted_block], + padding=stackwise_padding[block][inverted_block], + se_ratio=stackwise_se_ratio[block][inverted_block], + activation=stackwise_activation[block][inverted_block], + name=f"block_{block}_{inverted_block}", + ) + + if output_num_filters is not None: + last_conv_ch = adjust_channels(output_num_filters) + + x = keras.layers.Conv2D( + last_conv_ch, + kernel_size=1, + padding=(1, 1), + data_format=keras.config.image_data_format(), + use_bias=False, + name="output_conv", + )(x) + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name="output_batch_norm", + )(x) + x = keras.layers.Activation(output_activation)(x) + + super().__init__(inputs=image_input, outputs=x, **kwargs) + + # === Config === + self.stackwise_expansion = stackwise_expansion + self.stackwise_num_blocks = stackwise_num_blocks + self.stackwise_num_filters = stackwise_num_filters + self.stackwise_kernel_size = stackwise_kernel_size + self.stackwise_num_strides = stackwise_num_strides + self.stackwise_se_ratio = stackwise_se_ratio + self.stackwise_activation = stackwise_activation + self.stackwise_padding = stackwise_padding + self.input_num_filters = input_num_filters + self.output_num_filters = output_num_filters + self.depthwise_filters = depthwise_filters + self.squeeze_and_excite = squeeze_and_excite + self.input_activation = keras.activations.get(input_activation) + self.output_activation = keras.activations.get(output_activation) + self.image_shape = image_shape + + def get_config(self): + config = super().get_config() + config.update( + { + "stackwise_expansion": self.stackwise_expansion, + "stackwise_num_blocks": self.stackwise_num_blocks, + "stackwise_num_filters": self.stackwise_num_filters, + "stackwise_kernel_size": self.stackwise_kernel_size, + "stackwise_num_strides": self.stackwise_num_strides, + "stackwise_se_ratio": self.stackwise_se_ratio, + "stackwise_activation": self.stackwise_activation, + "stackwise_padding": self.stackwise_padding, + "image_shape": self.image_shape, + "input_num_filters": self.input_num_filters, + "output_num_filters": self.output_num_filters, + "depthwise_filters": self.depthwise_filters, + "squeeze_and_excite": self.squeeze_and_excite, + "input_activation": keras.activations.serialize( + activation=self.input_activation + ), + "output_activation": keras.activations.serialize( + activation=self.output_activation + ), + } + ) + return config + + +def adjust_channels(x, divisor=8, min_value=None): + """Ensure that all layers have a channel number divisible by the `divisor`. + + Args: + x: integer, input value. + divisor: integer, the value by which a channel number should be + divisible, defaults to 8. + min_value: float, optional minimum value for the new tensor. If None, + defaults to value of divisor. + + Returns: + the updated input scalar. + """ + + if min_value is None: + min_value = divisor + + new_x = max(min_value, int(x + divisor / 2) // divisor * divisor) + + # make sure that round down does not go down by more than 10%. + if new_x < 0.9 * x: + new_x += divisor + return new_x + + +def apply_inverted_res_block( + x, + expansion, + filters, + kernel_size, + stride, + padding, + se_ratio, + activation, + name=None, +): + """An Inverted Residual Block. + + Args: + x: input tensor. + expansion: integer, the expansion ratio, multiplied with infilters to + get the minimum value passed to adjust_channels. + filters: integer, number of filters for convolution layer. + kernel_size: integer, the kernel size for DepthWise Convolutions. + stride: integer, the stride length for DepthWise Convolutions. + padding: integer, padding for the convolution layer + se_ratio: float, ratio for bottleneck filters. Number of bottleneck + filters = filters * se_ratio. + activation: the activation layer to use. + name: string, block label. + + Returns: + the updated input tensor. + """ + channel_axis = ( + -1 if keras.config.image_data_format() == "channels_last" else 1 + ) + activation = keras.activations.get(activation) + shortcut = x + infilters = x.shape[channel_axis] + expanded_channels = adjust_channels(expansion) + + x = keras.layers.Conv2D( + expanded_channels, + kernel_size=1, + padding="same", + data_format=keras.config.image_data_format(), + use_bias=False, + name=f"{name}_conv1", + )(x) + + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=f"{name}_bn1", + )(x) + + x = keras.layers.Activation(activation=activation)(x) + + x = keras.layers.Conv2D( + expanded_channels, + kernel_size, + strides=stride, + padding=padding, + groups=expanded_channels, + data_format=keras.config.image_data_format(), + use_bias=False, + name=f"{name}_conv2", + )(x) + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=f"{name}_bn2", + )(x) + + x = keras.layers.Activation(activation=activation)(x) + + if se_ratio: + se_filters = expanded_channels + x = SqueezeAndExcite2D( + input=x, + filters=se_filters, + bottleneck_filters=adjust_channels(se_filters * se_ratio), + squeeze_activation="relu", + excite_activation=keras.activations.hard_sigmoid, + name=f"{name}_se", + ) + + x = keras.layers.Conv2D( + filters, + kernel_size=1, + padding="same", + data_format=keras.config.image_data_format(), + use_bias=False, + name=f"{name}_conv3", + )(x) + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=f"{name}_bn3", + )(x) + + if stride == 1 and infilters == filters: + x = keras.layers.Add(name=f"{name}_add")([shortcut, x]) + return x + + +def apply_depthwise_conv_block( + x, filters, kernel_size=3, stride=1, se=True, name=None +): + """Adds a depthwise convolution block. + + A depthwise convolution block consists of a depthwise conv, + batch normalization, relu6, pointwise convolution, + batch normalization and relu6 activation. + + Args: + x: Input tensor of shape `(rows, cols, channels) + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the pointwise convolution). + strides: An integer or tuple/list of 2 integers, specifying the strides + of the convolution along the width and height. + Can be a single integer to specify the same value for + all spatial dimensions. Specifying any stride value != 1 is + incompatible with specifying any `dilation_rate` value != 1. + block_id: Integer, a unique identification designating the block number. + + Input shape: + 4D tensor with shape: `(batch, rows, cols, channels)` in "channels_last" + 4D tensor with shape: `(batch, channels, rows, cols)` in "channels_first" + Returns: + Output tensor of block. + """ + channel_axis = ( + -1 if keras.config.image_data_format() == "channels_last" else 1 + ) + infilters = x.shape[channel_axis] + name = f"{name}_0" + + x = keras.layers.Conv2D( + infilters, + kernel_size, + strides=stride, + padding=(1, 1), + data_format=keras.config.image_data_format(), + groups=infilters, + use_bias=False, + name=f"{name}_conv1", + )(x) + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=f"{name}_bn1", + )(x) + x = keras.layers.ReLU(6.0)(x) + + if se: + x = SqueezeAndExcite2D( + input=x, + filters=infilters, + bottleneck_filters=adjust_channels(infilters * se), + squeeze_activation="relu", + excite_activation=keras.activations.hard_sigmoid, + name=f"{name}_se", + ) + + x = keras.layers.Conv2D( + filters, + kernel_size=1, + padding="same", + data_format=keras.config.image_data_format(), + use_bias=False, + name=f"{name}_conv2", + )(x) + x = keras.layers.BatchNormalization( + axis=channel_axis, + epsilon=BN_EPSILON, + momentum=BN_MOMENTUM, + name=f"{name}_bn2", + )(x) + return x + + +def SqueezeAndExcite2D( + input, + filters, + bottleneck_filters=None, + squeeze_activation="relu", + excite_activation="sigmoid", + name=None, +): + """ + Description: + This layer applies a content-aware mechanism to adaptively assign + channel-wise weights. It uses global average pooling to compress + feature maps into single values, which are then processed by + two Conv1D layers: the first reduces the dimensionality, and + the second restores it. + Args: + filters: Number of input and output filters. The number of input and + output filters is same. + bottleneck_filters: (Optional) Number of bottleneck filters. Defaults + to `0.25 * filters` + squeeze_activation: (Optional) String, callable (or + keras.layers.Layer) or keras.activations.Activation instance + denoting activation to be applied after squeeze convolution. + Defaults to `relu`. + excite_activation: (Optional) String, callable (or + keras.layers.Layer) or keras.activations.Activation instance + denoting activation to be applied after excite convolution. + Defaults to `sigmoid`. + name: Name of the layer + """ + if not bottleneck_filters: + bottleneck_filters = filters // 4 + + x =input + x = keras.layers.Conv2D( + bottleneck_filters, + (1, 1), + data_format=keras.config.image_data_format(), + activation=squeeze_activation, + name=f"{name}_conv_reduce", + )(x) + x = keras.layers.Conv2D( + filters, + (1, 1), + data_format=keras.config.image_data_format(), + activation=excite_activation, + name=f"{name}_conv_expand", + )(x) + + x = ops.multiply(x, input) + return x + + +def correct_pad_downsample(inputs, kernel_size): + """Returns a tuple for zero-padding for 2D convolution with downsampling. + + Args: + inputs: Input tensor. + kernel_size: An integer or tuple/list of 2 integers. + + Returns: + A tuple. + """ + img_dim = 1 + input_size = inputs.shape[img_dim : (img_dim + 2)] + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if input_size[0] is None: + adjust = (1, 1) + else: + adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) + correct = (kernel_size[0] // 2, kernel_size[1] // 2) + return ( + (correct[0] - adjust[0], correct[0]), + (correct[1] - adjust[1], correct[1]), + ) diff --git a/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py b/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py index 24fdd0db4c..3c6024aef1 100644 --- a/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py +++ b/keras_hub/src/models/mobilenet/mobilenet_backbone_test.py @@ -1,43 +1,62 @@ -import numpy as np -import pytest - -from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone -from keras_hub.src.tests.test_case import TestCase - - -class MobileNetBackboneTest(TestCase): - def setUp(self): - self.init_kwargs = { - "stackwise_expansion": [1, 4, 6], - "stackwise_num_filters": [4, 8, 16], - "stackwise_kernel_size": [3, 3, 5], - "stackwise_num_strides": [2, 2, 1], - "stackwise_se_ratio": [0.25, None, 0.25], - "stackwise_activation": ["relu", "relu", "hard_swish"], - "output_num_filters": 1280, - "input_activation": "hard_swish", - "output_activation": "hard_swish", - "inverted_res_block": True, - "input_num_filters": 16, - "image_shape": (224, 224, 3), - "depth_multiplier": 1, - } - self.input_data = np.ones((2, 224, 224, 3), dtype="float32") - - def test_backbone_basics(self): - self.run_vision_backbone_test( - cls=MobileNetBackbone, - init_kwargs=self.init_kwargs, - input_data=self.input_data, - expected_output_shape=(2, 28, 28, 96), - run_mixed_precision_check=False, - run_data_format_check=False, - ) - - @pytest.mark.large - def test_saved_model(self): - self.run_model_saving_test( - cls=MobileNetBackbone, - init_kwargs=self.init_kwargs, - input_data=self.input_data, - ) +import numpy as np +import pytest + +from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone +from keras_hub.src.tests.test_case import TestCase + + +class MobileNetBackboneTest(TestCase): + def setUp(self): + + self.init_kwargs = { + "stackwise_expansion": [ + [40, 56], + [64, 144, 144], + [ 72, 72], + [144, 288, 288] + ], + "stackwise_num_blocks": [2, 3, 2, 3], + "stackwise_num_filters": [4, 8, 16], + "stackwise_kernel_size": [[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]], + "stackwise_num_strides": [[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]], + "stackwise_se_ratio": [ + [None, None], + [0.25, 0.25, 0.25], + [0.3, 0.3], + [0.3, 0.25, 0.25], + ], + "stackwise_activation": [ + ["relu", "relu"], + ["hard_swish", "hard_swish", "hard_swish"], + ["hard_swish", "hard_swish"], + ["hard_swish", "hard_swish", "hard_swish"], + ], + "stackwise_padding": [[1, 1], [2, 2, 2], [2, 2], [2, 2, 2]], + "output_num_filters": 288, + "input_activation": "hard_swish", + "output_activation": "hard_swish", + "inverted_res_block": True, + "input_num_filters": 16, + "image_shape": (224, 224, 3), + "depthwise_filters": 8, + "squeeze_and_excite": 0.5, + } + self.input_data = np.ones((2, 224, 224, 3), dtype="float32") + + def test_backbone_basics(self): + self.run_vision_backbone_test( + cls=MobileNetBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + expected_output_shape=(2, 28, 28, 96), + run_mixed_precision_check=False, + run_data_format_check=False, + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=MobileNetBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) diff --git a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py index 57ebd65039..cb251918a0 100644 --- a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -1,57 +1,71 @@ -import numpy as np -import pytest - -from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone -from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( - MobileNetImageClassifier, -) -from keras_hub.src.tests.test_case import TestCase - - -class MobileNetImageClassifierTest(TestCase): - def setUp(self): - # Setup model. - self.images = np.ones((2, 224, 224, 3), dtype="float32") - self.labels = [0, 3] - self.backbone = MobileNetBackbone( - stackwise_expansion=[1, 4, 6], - stackwise_num_filters=[4, 8, 16], - stackwise_kernel_size=[3, 3, 5], - stackwise_num_strides=[2, 2, 1], - stackwise_se_ratio=[0.25, None, 0.25], - stackwise_activation=["relu", "relu", "hard_swish"], - output_num_filters=1280, - input_activation="hard_swish", - output_activation="hard_swish", - inverted_res_block=True, - input_num_filters=16, - image_shape=(224, 224, 3), - ) - self.init_kwargs = { - "backbone": self.backbone, - "num_classes": 2, - "activation": "softmax", - } - self.train_data = ( - self.images, - self.labels, - ) - - def test_classifier_basics(self): - pytest.skip( - reason="TODO: enable after preprocessor flow is figured out" - ) - self.run_task_test( - cls=MobileNetImageClassifier, - init_kwargs=self.init_kwargs, - train_data=self.train_data, - expected_output_shape=(2, 2), - ) - - @pytest.mark.large - def test_saved_model(self): - self.run_model_saving_test( - cls=MobileNetImageClassifier, - init_kwargs=self.init_kwargs, - input_data=self.images, - ) +import numpy as np +import pytest + +from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone +from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( + MobileNetImageClassifier, +) +from keras_hub.src.tests.test_case import TestCase + + +class MobileNetImageClassifierTest(TestCase): + def setUp(self): + # Setup model. + self.images = np.ones((2, 224, 224, 3), dtype="float32") + self.labels = [0, 3] + self.backbone = MobileNetBackbone( + stackwise_expansion=[1, 4, 6], + stackwise_num_blocks=[2, 3, 2, 3], + stackwise_num_filters=[4, 8, 16], + stackwise_kernel_size=[[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]], + stackwise_num_strides=[[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]], + stackwise_se_ratio=[ + [None, None], + [0.25, 0.25, 0.25], + [0.3, 0.3], + [0.3, 0.25, 0.25], + ], + stackwise_activation=[ + ["relu", "relu"], + ["hard_swish", "hard_swish", "hard_swish"], + ["hard_swish", "hard_swish"], + ["hard_swish", "hard_swish", "hard_swish"], + ], + stackwise_padding=[[1, 1], [2, 2, 2], [2, 2], [2, 2, 2]], + output_num_filters=288, + input_activation="hard_swish", + output_activation="hard_swish", + inverted_res_block=True, + input_num_filters=16, + image_shape=(224, 224, 3), + depthwise_filters=8, + squeeze_and_excite=0.5, + ) + self.init_kwargs = { + "backbone": self.backbone, + "num_classes": 2, + "activation": "softmax", + } + self.train_data = ( + self.images, + self.labels, + ) + + def test_classifier_basics(self): + pytest.skip( + reason="TODO: enable after preprocessor flow is figured out" + ) + self.run_task_test( + cls=MobileNetImageClassifier, + init_kwargs=self.init_kwargs, + train_data=self.train_data, + expected_output_shape=(2, 2), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=MobileNetImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + )